To run: Beforehand:

module load pandoc

In R:

setwd("~/shared-gandalm/brain_CTP/Scripts/methylation/analysis/combine_methods/ctp_validation")
rmarkdown::render("ctp_validation_ROSMAP.Rmd", "html_document")

1 Set-up

1.1 Directories and libraries

library(tidyverse)
library(rmarkdown)    # You need this library to run this template.
library(epuRate)      # Install with remotes::install_github("holtzy/epuRate", force=TRUE) - use this instead of devtools::install_github()
library(data.table)
library(DT)
library(tidyverse)
library(dplyr)
library(reshape2)
library(uwot) # for umap
library(methylCC)
library(lme4)
library(compositions)
library(zCompositions)
library(mixOmics)
source("~/project-gandalm/software/ANCOM-v2.1/scripts/ancom_v2.1.R")
library(ALDEx2)
library(factoextra)
library(ggplot2)
library(RColorBrewer)
library(viridis)
library(ggpubr)
library(GGally)
library(lattice)
library(gridExtra)

# BiocManager::install("M3C")
# library(M3C)
#filen <- "ROSMAP_arrayMethylation_imputed"
#filen <- "ROSMAP_ComBatbatchplate_neg0"
#filen <- "ROSMAP_ComBatplate_neg0"
#filen <- "ROSMAP_SNMplate_neg0"
filen <- "ROSMAP"

# Cell-type number
cellnum <- 9
cellnum <- 7

write_out = TRUE

out_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ROSMAP/analysis"

# Read in reference object
# - use chisq statistic, 5 cell-types, bonferroni p<0.001, marker contributing >=50% of chisq stat, take top 200
# ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_chisq_dmr_ilmn450kepic_aggto100bp_cell7_bonf001_contr50pc_top200.rds"
# ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_chisq_dmr_cell5_bonf001_contr50pc_top200.rds"
#ref_dir <- "~/shared-gandalm/brain_CTP/Data/reference_cell_profile/Luo2020/Luo2020_chisq_dmr_ilmn450kepic_aggto100bp_cell7_bonf001_contr50pc_top200.rds"
#ref <- readRDS(ref_dir)

# Pheno
pheno_dir <- "~/shared-gandalm/brain_CTP/Data/pheno/ROSMAP/pheno_ROSMAP.txt"

# methylCC
# - use Luo et al. reference with DMRs identified from 450K overlap 
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_methylcc_out_n200_p1e-6.rds", sep = ""))
# - use 450K dlpfc guintivano reference (+/- randomisation of sample order)
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_methylcc_out_n200_dlpfc_450k_guintivano_hpc.rds", sep = ""))
# - use Luo et al. reference with DMRs pre-identified by Luo et al.
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_predmr_ilmn450k_celltype_na5_methylcc_dmr_n200_p1e_1.rds", sep = ""))
# - use extreme_methylation_sites.R output
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_extreme17_dmr_low0.3_high0.7.rds", sep = ""))
# - use chisq statistic, 5 cell-types, bonferroni p<0.001, marker contributing >=50% of chisq stat, take top 200
#saveRDS(methylcc_out, paste(out_dir, "/", filen, "_aut_mask_chisq_dmr_cell5_bonf001_contr50pc_top200.rds", sep = ""))
#methylcc_dir <- paste(out_dir, "/", filen, "_aut_mask_chisq_dmr_cell5_bonf001_contr50pc_top200.rds", sep = "")
filen0 <- "ROSMAP"
#methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_chisq_dmr_cell5_bonf001_contr50pc_top200_aggpostmcc.txt", sep = "")
#methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_chisq_dmr_ilmn450kepic_aggto100bp_c10_cell7_bonf001_contr50pc_top200_aggpostmcc.txt", sep = "")

if (cellnum == 9) {
  methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_extremes_dmr_ilmn450kepic_aggto100bp_c10_cell9_split6040all_aggpostmcc.txt", sep = "")
} else if (cellnum == 7) {
  methylcc_dir <- paste(out_dir, "/", filen0, "_aut_mask_extremes_dmr_ilmn450kepic_aggto100bp_c10_cell7_split6040all_aggpostmcc.txt", sep = "")
}

# methylCC with NeuN+/- data (Guintivano DLPFC)
methylcc_neun_dir <- paste("~/shared-gandalm/brain_CTP/Data/methylation/ROSMAP/analysis/", filen, "_aut_mask_methylcc_out_n200_dlpfc_450k_guintivano_hpc.rds", sep = "")

# eBayes + Houseman
houseman_dir <- paste("~/shared-gandalm/brain_CTP/Data/methylation/ROSMAP/analysis/", filen, "_aut_mask_t_ref450K_toast_houseman_ls.rds", sep = "")

# sequencing with extremes + Houseman
houseman_seq_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ROSMAP/analysis/ROSMAP_aut_mask_t_Luo2020_extremes_dmr_ilmn450kepic_aggto100bp_c10_cell7_split6040all_houseman_ls.rds"

# celfie
celfie_dir <-  "~/shared-gandalm/brain_CTP/Data/methylation/ROSMAP/analysis/celfie/ROSMAP_aut_mask_t_celfie.out/1_tissue_proportions.txt"
celfie_label_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ROSMAP/analysis/celfie/ROSMAP_aut_mask_t_celfie.in"

# ReFACToR
#refactor_dir <- "~/shared-gandalm/brain_CTP/Data/methylation/ASD_methylation_brain/analysis/KCL_R01MH094714_ASD_Illumina450K_PFC_k8_cov.refactor.components.txt"

# smartSV
filen <- "ROSMAP_ComBatplate_neg0"
smartsva_dir <- paste("~/shared-gandalm/brain_CTP/Data/methylation/ROSMAP/analysis/", filen, "_aut_mask_t_SmartSVA.rds", sep  = "")
filen <- "ROSMAP"

# VAE/MethylNet
vae_dir <- "~/shared-gandalm/brain_CTP/Data/integration/methylnet/combined/full_output_latent.csv"

1.2 Read-in data

# Pheno
pheno <- read.delim(pheno_dir, header = T, as.is = T)
colnames(pheno)[which(colnames(pheno) == "individualID")] <- "IID"
colnames(pheno)[which(colnames(pheno) == "age_death")] <- "age"
pheno$age[which(pheno$age == "90+")] <- 90
pheno$age <- as.numeric(pheno$age)
colnames(pheno)[which(colnames(pheno) == "msex")] <- "sex"
pheno$sex <- ifelse(pheno$sex == 1, "M", "F")
pheno$batch <- as.factor(pheno$batch)
pheno$group <- ifelse(pheno$cogdx %in% 4:5, "AZD", "CTL")

# methylCC
methylcc_ctp <- read.delim(methylcc_dir)
ind <- grep("NonN", colnames(methylcc_ctp))
foo <- colnames(methylcc_ctp)[ind]
foo2 <- unlist(lapply(strsplit(gsub("NonN_", "", foo), "_"), function(x) x[1]))
colnames(methylcc_ctp)[ind] <- foo2
colnames(methylcc_ctp)[2:ncol(methylcc_ctp)] <- paste("mcc_", colnames(methylcc_ctp)[2:ncol(methylcc_ctp)], sep = "")
# methylcc <- readRDS(methylcc_dir)
# methylcc_ctp <- cell_counts(methylcc)
# ctp_sum <- methylcc@summary
# ctp_input <- ctp_sum$input
# colSums(ctp_input$zmat)
# Would use this to mimic Supp Fig 1
# colSums(ref$zmat) # this is the initial starting proportions

# methylCC on Guintivano DLPFC reference (NeuN+/-)
# methylcc_neun <- readRDS(methylcc_neun_dir)
# methylcc_neun_ctp <- cell_counts(methylcc_neun)
# colnames(methylcc_neun_ctp) <- c("mcc_NeuN_neg", "mcc_NeuN_pos")
# ctp_neun_sum <- methylcc_neun@summary
# ctp_neun_input <- ctp_neun_sum$input
# colSums(ctp_neun_input$zmat)

# eBayes + Houseman
houseman.ls <- readRDS(houseman_dir)
# - coefs_sub
coefs_sub <- houseman.ls$coefs_sub
colnames(coefs_sub)[2:ncol(coefs_sub)] <- paste("h_", colnames(coefs_sub)[2:ncol(coefs_sub)], sep = "")
colnames(coefs_sub)[ncol(coefs_sub)] <- "h_error_sub"
coefs_brain <- houseman.ls$coefs_brain
colnames(coefs_brain)[2:ncol(coefs_brain)] <- paste("h_", colnames(coefs_brain)[2:ncol(coefs_brain)], sep = "")
colnames(coefs_brain)[ncol(coefs_brain)] <- "h_error_brain"


# sequencing + Houseman
houseman_seq.ls <- readRDS(houseman_seq_dir)
houseman_seq <- houseman_seq.ls[[1]]
ind <- grep("NonN", colnames(houseman_seq))
foo <- colnames(houseman_seq)[ind]
foo2 <- unlist(lapply(strsplit(gsub("NonN_", "", foo), "_"), function(x) x[1]))
colnames(houseman_seq)[ind] <- foo2
colnames(houseman_seq)[2:ncol(houseman_seq)] <- paste("hseq_", colnames(houseman_seq)[2:ncol(houseman_seq)], sep = "")

# CelFIE
celfie <- fread(celfie_dir)
colnames(celfie) <- c("IID", "celfie_Exc", "celfie_Inh", "celfie_Astro", "celfie_Endo", "celfie_Micro", "celfie_Oligo", "celfie_OPC", "unknown1")
#celfie_label <- fread(celfie_label_dir)

# ReFACToR
# refactor <- read.table(refactor_dir, header = T, as.is = T)
# colnames(refactor)[1] <- "IID"

# smartSVA
smartsva <- readRDS(smartsva_dir)
smartsva <- data.frame(IID = rownames(smartsva), smartsva)

if (ncol(smartsva) > 11) {
  smartsva <- smartsva[,c(1:11)]
}

# VAE, MethylNet
vae <- read.csv(vae_dir)
colnames(vae) <- c("IID", paste("VAEe", 0:9, sep = ""))

1.2.1 Join into 1 dataframe

#ctp_pheno <- inner_join(data.frame(IID = rownames(methylcc_ctp), methylcc_ctp), pheno, by = "IID")
methylcc_ctp.tmp <- methylcc_ctp
methylcc_ctp.tmp$mcc_Glial <- rowSums(methylcc_ctp.tmp[,-c(grep("IID", colnames(methylcc_ctp.tmp)), grep("Exc", colnames(methylcc_ctp.tmp)), grep("Inh", colnames(methylcc_ctp.tmp)))])
methylcc_ctp.tmp$mcc_Neuron <- rowSums(methylcc_ctp.tmp[,c(grep("Exc", colnames(methylcc_ctp.tmp)), grep("Inh", colnames(methylcc_ctp.tmp)))])
ctp_pheno <- inner_join(methylcc_ctp.tmp, pheno, by = "IID")
#ctp_pheno <- inner_join(ctp_pheno, data.frame(IID = rownames(methylcc_neun_ctp), methylcc_neun_ctp), by = "IID")
ctp_pheno <- inner_join(ctp_pheno, smartsva, by = "IID")
#ctp_pheno <- inner_join(ctp_pheno, houseman0, by = "IID")

# Houseman with array reference
ctp_pheno <- inner_join(ctp_pheno, coefs_sub, by = "IID")
ctp_pheno <- inner_join(ctp_pheno, coefs_brain, by = "IID")
ctp_pheno$h_Glial <- rowSums(ctp_pheno[,c("h_ASTRO", "h_OPC", "h_OLIGO", "h_ENDO", "h_MONO")])
ctp_pheno$h_Neuron <- rowSums(ctp_pheno[,c("h_GABA", "h_GLU")])

# Houseman with sequencing reference
ctp_pheno <- inner_join(ctp_pheno, houseman_seq, by = "IID")
ctp_pheno$hseq_Glial <- rowSums(ctp_pheno[,c("hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC")])
ctp_pheno$hseq_Neuron <- rowSums(ctp_pheno[,c("hseq_Exc", "hseq_Inh")])

# CelFIE
ctp_pheno <- inner_join(ctp_pheno, celfie, by = c("IID"))
ctp_pheno$celfie_Glial <- rowSums(ctp_pheno[,c("celfie_Astro", "celfie_Endo", "celfie_Micro", "celfie_Oligo", "celfie_OPC")])
ctp_pheno$celfie_Neuron <- rowSums(ctp_pheno[,c("celfie_Exc", "celfie_Inh")])

ctp_pheno <- inner_join(ctp_pheno, vae, by = "IID")

1.2.2 Write output

if (write_out == TRUE) {
  write.table(ctp_pheno, paste(out_dir, "/", filen, "_ctp_pheno.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

1.2.3 Set variables

keep_cols <- c("IID", "group", "age", "sex", "batch", "Sample_Plate", "race", "pmi")

ctp_cols <- c(colnames(ctp_pheno)[grep("hseq", colnames(ctp_pheno))])
ctp_cols <- ctp_cols[-which(ctp_cols == "hseq_error")]
level2_cols <- ctp_cols[-which(ctp_cols %in% c("hseq_Glial", "hseq_Neuron"))]

glial_rmoligo_n <- colnames(ctp_pheno)[grep("hseq", colnames(ctp_pheno))]
glial_rmoligo_n <- glial_rmoligo_n[-which(glial_rmoligo_n %in% c("hseq_Exc", "hseq_Inh", "hseq_Oligo", "hseq_error", "hseq_Glial", "hseq_Neuron"))]
neuron_n <- c("hseq_Exc", "hseq_Inh")

2 Check baseline variable differences

  • look for confounding
  • there is confounding for age, brain bank, batch (when looking at the PFC samples only)
  • this means that accounting for covariates might also take out the ASD signal?!
# Batch
batch <- ctp_pheno %>% group_by(group, batch) %>% dplyr::summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
batch
## # A tibble: 2 × 3
##   batch   AZD   CTL
##   <fct> <int> <int>
## 1 0       100   164
## 2 1       199   252
chisq.test(batch %>% dplyr::select("AZD", "CTL"))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  batch %>% dplyr::select("AZD", "CTL")
## X-squared = 2.419, df = 1, p-value = 0.1199
# PMI
summary(lm(pmi ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = pmi ~ group, data = ctp_pheno)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.802 -3.052 -1.752  0.931 77.281 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.9545     0.3327  20.902   <2e-16 ***
## groupCTL      0.8478     0.4362   1.944   0.0523 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.753 on 713 degrees of freedom
## Multiple R-squared:  0.005271,   Adjusted R-squared:  0.003876 
## F-statistic: 3.778 on 1 and 713 DF,  p-value: 0.05233
ggplot(ctp_pheno, aes(x = pmi, colour = group, fill = group)) + geom_histogram() + facet_wrap(~group)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#chisq.test(batch %>% dplyr::select("ASD", "CTL"))

# Race
race <- ctp_pheno %>% group_by(group, race) %>% dplyr::summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
race
## # A tibble: 4 × 3
##    race   AZD   CTL
##   <int> <int> <int>
## 1     1   292   406
## 2     2     7     8
## 3     3    NA     1
## 4     7    NA     1
#chisq.test(batch %>% dplyr::select("ASD", "CTL"))

# Sex
sex <- ctp_pheno %>% group_by(group, sex) %>% dplyr::summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
sex
## # A tibble: 2 × 3
##   sex     AZD   CTL
##   <chr> <int> <int>
## 1 F       199   255
## 2 M       100   161
chisq.test(sex  %>% filter(sex %in% c("M", "F")) %>% dplyr::select("AZD", "CTL"))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  sex %>% filter(sex %in% c("M", "F")) %>% dplyr::select("AZD",     "CTL")
## X-squared = 1.8537, df = 1, p-value = 0.1734
# Age
summary(lm(age ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = age ~ group, data = ctp_pheno)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.225  -2.246   1.983   2.553   4.782 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  87.8331     0.2650 331.499  < 2e-16 ***
## groupCTL     -2.6152     0.3474  -7.529 1.55e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.582 on 713 degrees of freedom
## Multiple R-squared:  0.07364,    Adjusted R-squared:  0.07234 
## F-statistic: 56.68 on 1 and 713 DF,  p-value: 1.552e-13
ggplot(ctp_pheno, aes(x = age, colour = group, fill = group)) + geom_histogram() + facet_wrap(~group)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Education
summary(lm(educ ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = educ ~ group, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.3712  -2.4832  -0.3712   2.5168  11.6288 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  16.3712     0.2071  79.067   <2e-16 ***
## groupCTL      0.1119     0.2715   0.412     0.68    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.58 on 713 degrees of freedom
## Multiple R-squared:  0.0002384,  Adjusted R-squared:  -0.001164 
## F-statistic:  0.17 on 1 and 713 DF,  p-value: 0.6802
ggplot(ctp_pheno, aes(x = educ, colour = group, fill = group)) + geom_histogram() + facet_wrap(~group)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Braak stage
braaksc <- ctp_pheno %>% group_by(group, braaksc) %>% dplyr::summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
braaksc
## # A tibble: 7 × 3
##   braaksc   AZD   CTL
##     <int> <int> <int>
## 1       0    NA     9
## 2       1    12    50
## 3       2    11    61
## 4       3    65   147
## 5       4    82   119
## 6       5   122    30
## 7       6     7    NA
# CERAD score (neuritic plaques)
ceradsc <- ctp_pheno %>% group_by(group, ceradsc) %>% dplyr::summarise(n=n()) %>% spread(group, n)
## `summarise()` has grouped output by 'group'. You can override using the `.groups` argument.
ceradsc
## # A tibble: 4 × 3
##   ceradsc   AZD   CTL
##     <int> <int> <int>
## 1       1   149    68
## 2       2   103   136
## 3       3    20    55
## 4       4    27   157

3 Check association of sSVs and batch effects

tmp.long <- ctp_pheno %>% dplyr::select(c("IID", "batch", "pmi", "Sample_Plate", "hseq_Glial", "sSV1", "sSV2", "sSV3", "sSV4")) %>% melt(id.vars = c("IID", "batch", "pmi", "Sample_Plate", "hseq_Glial"), variable.name = "sSV_num", value.name = "sSV")

ggplot(tmp.long, aes(x = batch, y = sSV)) + geom_boxplot() + facet_wrap(~ sSV_num, scales = "free_y")

ggplot(tmp.long, aes(x = pmi, y = sSV, colour = batch)) + geom_point() + facet_wrap(~ sSV_num)

ggplot(tmp.long, aes(x = Sample_Plate, y = sSV, colour = batch)) + geom_boxplot() + facet_wrap(~ sSV_num)

ggplot(tmp.long, aes(x = hseq_Glial, y = sSV, colour = batch)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ sSV_num)
## `geom_smooth()` using formula 'y ~ x'

4 Check association of VAE embeddings and batch effects

tmp.long <- ctp_pheno %>% dplyr::select(c("IID", "batch", "pmi", "age", "sex", "group", "Sample_Plate", "hseq_Glial", "VAEe0", "VAEe1", "VAEe2", "VAEe3", "VAEe4", "VAEe5", "VAEe6", "VAEe7", "VAEe8", "VAEe9")) %>% melt(id.vars = c("IID", "batch", "pmi", "age", "sex", "group", "Sample_Plate", "hseq_Glial"), variable.name = "VAEe", value.name = "embedding")

ggplot(tmp.long, aes(x = batch, y = embedding)) + geom_boxplot() + facet_wrap(~ VAEe, scales = "free_y")

ggplot(tmp.long, aes(x = age, y = embedding, colour = batch)) + geom_point() + facet_wrap(~ VAEe)

ggplot(tmp.long, aes(x = pmi, y = embedding, colour = batch)) + geom_point() + facet_wrap(~ VAEe)

ggplot(tmp.long, aes(x = Sample_Plate, y = embedding, colour = batch)) + geom_boxplot() + facet_wrap(~ VAEe)

ggplot(tmp.long, aes(x = sex, y = embedding)) + geom_boxplot() + facet_wrap(~ VAEe)

ggplot(tmp.long, aes(x = group, y = embedding)) + geom_boxplot() + facet_wrap(~ VAEe, scales = "free_y")

ggplot(tmp.long, aes(x = hseq_Glial, y = embedding, colour = batch)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ VAEe)
## `geom_smooth()` using formula 'y ~ x'

5 Boxplots - raw

Add analysis info on

houseman_seq.long <- melt(houseman_seq, variable.name = "celltype", value.name = "seq_houseman_CTP")
## Using IID as id variables
houseman_seq.gg <- ggplot(houseman_seq.long, aes(x = celltype, y = seq_houseman_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

methylcc_ctp.long <- melt(methylcc_ctp, variable.name = "celltype", value.name = "methylcc_CTP")
## Using IID as id variables
methylcc_ctp.gg <- ggplot(methylcc_ctp.long, aes(x = celltype, y = methylcc_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))
#+ theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1))

coefs_sub.long <- melt(coefs_sub, variable.name = "celltype", value.name = "eBayes_houseman_CTP") %>% mutate(MatchCell = match(celltype, c("h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")))
## Using IID as id variables
coefs_sub.gg <- coefs_sub.long %>% mutate(celltype = fct_reorder(celltype, MatchCell)) %>% ggplot(aes(x = celltype, y = eBayes_houseman_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

coefs_brain.long <- melt(coefs_brain, variable.name = "celltype", value.name = "eBayes_houseman_CTP")
## Using IID as id variables
coefs_brain.gg <- ggplot(coefs_brain.long, aes(x = celltype, y = eBayes_houseman_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

smartsva.long <- melt(smartsva, variable.name = "celltype", value.name = "smartsva_CTP")
## Using IID as id variables
smartsva.gg <- ggplot(smartsva.long, aes(x = celltype, y = smartsva_CTP, colour = celltype)) + geom_boxplot()  + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

vae.long <- melt(vae, id.vars = c("IID"), variable.name = "celltype", value.name = "VAE_embed")
vae.gg <- ggplot(vae.long, aes(x = celltype, y = VAE_embed, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

celfie.long <- melt(celfie, id.vars = c("IID"), variable.name = "celltype", value.name = "CelFIE_CTP")
celfie.gg <- ggplot(celfie.long, aes(x = celltype, y = CelFIE_CTP, colour = celltype)) + geom_boxplot() + theme(legend.position = "none", axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + ylim(c(0,1))

# Arrange into 1 plot
#ggarrange(houseman_seq.gg, methylcc_ctp.gg, coefs_sub.gg, coefs_brain.gg, jaffe.gg, celfie.gg, smartsva.gg, nrow = 3, ncol = 2)

ggarrange(houseman_seq.gg, methylcc_ctp.gg, coefs_sub.gg, coefs_brain.gg, celfie.gg, smartsva.gg, nrow = 3, ncol = 2)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

5.1 houseman_seq

# - boxplot
hseq_ctp_pheno.tmp <- ctp_pheno %>% dplyr::select(c(all_of(keep_cols), all_of(ctp_cols)))

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno.tmp, paste(out_dir, "/hseq_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
hseq_ctp_pheno.long <- melt(hseq_ctp_pheno.tmp, measure.vars = c(all_of(ctp_cols)), variable.name = "celltype", value.name = "CTP")
hseq_ctp_pheno.long$Level <- ifelse(hseq_ctp_pheno.long$celltype %in% c("hseq_Neuron", "hseq_Glial"), "Level1", "Level2")
hseq_ctp_pheno.long$Region <- "PFC"
hseq_ctp_pheno.long$celltype <- gsub("hseq_", "", hseq_ctp_pheno.long$celltype)
hseq_ctp_pheno.long$celltype <- unlist(lapply(strsplit(hseq_ctp_pheno.long$celltype, "_"), function(x) x[1]))
hseq_ctp_pheno.long$MatchCell <- match(hseq_ctp_pheno.long$celltype, rev(c("Exc", "Inh", "Astro", "Micro", "Endo", "Oligo", "OPC", "Neuron", "Glial")))
hseq_ctp_pheno.long %>% mutate(celltype = fct_reorder(celltype, MatchCell)) %>%
  ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.1.1 Adjust for oligodendrocyte proportion

# New adjustment which only changes neuronal populations
hseq_ctp_pheno_adjoligo.tmp <- hseq_ctp_pheno.tmp %>%
  mutate(sum_neuro_oligo = hseq_Exc + hseq_Inh + hseq_Oligo) %>%
  mutate(Exc_Inh_ratio = (hseq_Exc/(hseq_Exc + hseq_Inh))) %>%
  mutate(Inh_Exc_ratio = (hseq_Inh/(hseq_Exc + hseq_Inh))) %>%
  mutate(hseq_Exc = Exc_Inh_ratio * sum_neuro_oligo) %>%
  mutate(hseq_Inh = Inh_Exc_ratio * sum_neuro_oligo) %>%
  dplyr::select(-c("hseq_Oligo", "hseq_Neuron", "hseq_Glial"))

# Old adjustment which changed ALL CTPs
# methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo <- methylcc_ctp_pheno_adjoligo.tmp$Exc + methylcc_ctp_pheno_adjoligo.tmp $Inh + methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R + methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP
# methylcc_ctp_pheno_adjoligo.tmp$Exc <- methylcc_ctp_pheno_adjoligo.tmp$Exc/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$Inh <- methylcc_ctp_pheno_adjoligo.tmp$Inh/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo

hseq_ctp_pheno_adjoligo.tmp$hseq_Neuron <- rowSums(hseq_ctp_pheno_adjoligo.tmp[,neuron_n])
hseq_ctp_pheno_adjoligo.tmp$hseq_Glial <- rowSums(hseq_ctp_pheno_adjoligo.tmp[,glial_rmoligo_n])

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno_adjoligo.tmp, paste(out_dir, "/hseq_adjoligo_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
hseq_ctp_pheno_adjoligo.long <- melt(hseq_ctp_pheno_adjoligo.tmp, measure.vars = c(all_of(neuron_n), all_of(glial_rmoligo_n), "hseq_Neuron", "hseq_Glial"), variable.name = "celltype", value.name = "CTP")
hseq_ctp_pheno_adjoligo.long$Level <- ifelse(hseq_ctp_pheno_adjoligo.long$celltype %in% c("hseq_Neuron", "hseq_Glial"), "Level1", "Level2")
hseq_ctp_pheno_adjoligo.long$Region <- "PFC"
hseq_ctp_pheno_adjoligo.long$celltype <- gsub("hseq_", "", hseq_ctp_pheno_adjoligo.long$celltype)

hseq_ctp_pheno_adjoligo.long %>% mutate(celltype = fct_reorder(celltype, match(celltype, rev(c("Exc", "Inh", "Astro", "Endo", "Micro", "OPC"))))) %>%
  ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.2 methylCC

ctp_cols_mcc <- gsub("hseq", "mcc", ctp_cols)
neuron_n_mcc <- gsub("hseq", "mcc", neuron_n)
glial_rmoligo_n_mcc <- gsub("hseq", "mcc", glial_rmoligo_n)

# - boxplot
methylcc_ctp_pheno.tmp <- ctp_pheno %>% dplyr::select(c(all_of(keep_cols), all_of(ctp_cols_mcc)))

if (write_out == TRUE) {
  write.table(methylcc_ctp_pheno.tmp, paste(out_dir, "/methylcc_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
methylcc_ctp_pheno.long <- melt(methylcc_ctp_pheno.tmp, measure.vars = c(all_of(ctp_cols_mcc)), variable.name = "celltype", value.name = "CTP")
methylcc_ctp_pheno.long$Level <- ifelse(methylcc_ctp_pheno.long$celltype %in% c("mcc_Neuron", "mcc_Glial"), "Level1", "Level2")
methylcc_ctp_pheno.long$Region <- "PFC"
methylcc_ctp_pheno.long$celltype <- gsub("mcc_", "", methylcc_ctp_pheno.long$celltype)

methylcc_ctp_pheno.long %>% mutate(celltype = fct_reorder(celltype, match(celltype, rev(c("Exc", "Inh", "Astro", "Endo", "Micro", "Oligo", "OPC"))))) %>%
ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.2.1 Adjust for oligodendrocyte proportion

# New adjustment which only changes neuronal populations
methylcc_ctp_pheno_adjoligo.tmp <- methylcc_ctp_pheno.tmp %>%
  mutate(sum_neuro_oligo = mcc_Exc + mcc_Inh + mcc_Oligo) %>%
  mutate(Exc_Inh_ratio = (mcc_Exc/(mcc_Exc + mcc_Inh))) %>%
  mutate(Inh_Exc_ratio = (mcc_Inh/(mcc_Exc + mcc_Inh))) %>%
  mutate(mcc_Exc = Exc_Inh_ratio * sum_neuro_oligo) %>%
  mutate(mcc_Inh = Inh_Exc_ratio * sum_neuro_oligo) %>%
  dplyr::select(-c("mcc_Oligo", "mcc_Neuron", "mcc_Glial"))

# Old adjustment which changed ALL CTPs
# methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo <- methylcc_ctp_pheno_adjoligo.tmp$Exc + methylcc_ctp_pheno_adjoligo.tmp $Inh + methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R + methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP
# methylcc_ctp_pheno_adjoligo.tmp$Exc <- methylcc_ctp_pheno_adjoligo.tmp$Exc/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$Inh <- methylcc_ctp_pheno_adjoligo.tmp$Inh/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Astro_FGF3R/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo
# methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP <- methylcc_ctp_pheno_adjoligo.tmp$NonN_Micro.Endo_TYROBP/methylcc_ctp_pheno_adjoligo.tmp$sum_nonoligo

methylcc_ctp_pheno_adjoligo.tmp$mcc_Neuron <- rowSums(methylcc_ctp_pheno_adjoligo.tmp[,neuron_n_mcc])
methylcc_ctp_pheno_adjoligo.tmp$mcc_Glial <- rowSums(methylcc_ctp_pheno_adjoligo.tmp[,glial_rmoligo_n_mcc])

if (write_out == TRUE) {
  write.table(methylcc_ctp_pheno_adjoligo.tmp, paste(out_dir, "/methylcc_adjoligo_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
methylcc_ctp_pheno_adjoligo.long <- melt(methylcc_ctp_pheno_adjoligo.tmp, measure.vars = c(all_of(neuron_n_mcc), all_of(glial_rmoligo_n_mcc), "mcc_Neuron", "mcc_Glial"), variable.name = "celltype", value.name = "CTP")
methylcc_ctp_pheno_adjoligo.long$Level <- ifelse(methylcc_ctp_pheno_adjoligo.long$celltype %in% c("mcc_Neuron", "mcc_Glial"), "Level1", "Level2")
methylcc_ctp_pheno_adjoligo.long$Region <- "PFC"
methylcc_ctp_pheno_adjoligo.long$celltype <- gsub("mcc_", "", methylcc_ctp_pheno_adjoligo.long$celltype)

methylcc_ctp_pheno_adjoligo.long %>% mutate(celltype = fct_reorder(celltype, match(celltype, rev(c("Exc", "Inh", "Astro", "Endo", "Micro", "OPC"))))) %>%
ggplot(aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

5.3 coefs_sub

# - boxplot
coefs_sub_ctp_pheno.tmp <- ctp_pheno %>% dplyr::select(c(all_of(keep_cols), "h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OPC", "h_OLIGO"))
coefs_sub_ctp_pheno.tmp$neuron_sum <- coefs_sub_ctp_pheno.tmp$h_GABA + coefs_sub_ctp_pheno.tmp$h_GLU
coefs_sub_ctp_pheno.tmp$glia_sum <- coefs_sub_ctp_pheno.tmp$h_ASTRO + coefs_sub_ctp_pheno.tmp$h_OPC + coefs_sub_ctp_pheno.tmp$h_OLIGO + coefs_sub_ctp_pheno.tmp$h_ENDO + coefs_sub_ctp_pheno.tmp$h_MONO

if (write_out == TRUE) {
  write.table(coefs_sub_ctp_pheno.tmp, paste(out_dir, "/coefs_sub_ctp_pheno.raw.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}
coefs_sub_ctp_pheno.long <- melt(coefs_sub_ctp_pheno.tmp, measure.vars = c("h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OPC", "h_OLIGO", "neuron_sum", "glia_sum"), variable.name = "celltype", value.name = "CTP")
coefs_sub_ctp_pheno.long$Level <- ifelse(coefs_sub_ctp_pheno.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
coefs_sub_ctp_pheno.long$Region <- "PFC"

ggplot(coefs_sub_ctp_pheno.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +   
    facet_grid(Level~Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

6 Boxplots: clr-transformation (2 ways to handle zeros)

  • Justification: does it make sense to run an ANOVA on CTP ~ group, if the CTPs are related?
  • Eg. what does variance analysis look like when the y-variable for each individual is non-independent?

6.1 houseman_seq

6.1.1 clr-transformation with z-compositions to deal with zeros

  • IDs go as columns, CTPs as rows for zCompositions cmultRepl(), and IDs as rows for the clr(?
keep_pheno_col <- hseq_ctp_pheno.tmp %>% dplyr::select(all_of(keep_cols))
hseq_ctp_pheno.tmp.clr <- as.matrix(hseq_ctp_pheno.tmp %>% dplyr::select(c(all_of(level2_cols))))

# Apply zCompositions to empirically get the best replacement of zeros
# - it looks like you have to transpose the matrix (with samples as columns) to do this properly (?)
length(which(hseq_ctp_pheno.tmp.clr == 0)) # check there are no 0s
## [1] 2
check <- which(hseq_ctp_pheno.tmp.clr == 0, arr.ind = T)

hseq_ctp_pheno.tmp.clr_t <- t(hseq_ctp_pheno.tmp.clr)

if (length(which(hseq_ctp_pheno.tmp.clr == 0)) > 0) {
  
  check <- which(hseq_ctp_pheno.tmp.clr_t == 0, arr.ind = T)
  hseq_ctp_pheno.tmp.clr0_t <- cmultRepl(hseq_ctp_pheno.tmp.clr_t, output = "p-counts")
  hseq_ctp_pheno.tmp.clr0_t[check]

} else {
  
  print("no zcompositions applied as no 0 values")
  hseq_ctp_pheno.tmp.clr0_t <- hseq_ctp_pheno.tmp.clr_t
  
}
## No. corrected values:  2
## [1] 0.003802408 0.003950411
# Perform clr-transform
hseq_ctp_pheno.tmp.clr0.tr <- clr(t(hseq_ctp_pheno.tmp.clr_t))
hseq_ctp_pheno.clr.zc <- data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0.tr)

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno.clr.zc, paste(out_dir, "/hseq_ctp_pheno.clr.zc.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

Make dataframe

hseq_ctp_pheno.clr.zc.long <- melt(hseq_ctp_pheno.clr.zc, measure.vars = c(all_of(level2_cols)), variable.name = "celltype", value.name = "CTP")
#methylcc_ctp_pheno.clr.zc.long$Level <- ifelse(methylcc_ctp_pheno.clr.zc.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
hseq_ctp_pheno.clr.zc.long$Region <- "PFC"
ggplot(hseq_ctp_pheno.clr.zc.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("")

  ggtitle("zCompositions")
## $title
## [1] "zCompositions"
## 
## attr(,"class")
## [1] "labels"
    #facet_grid(Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

6.1.2 clr-transformation with 1e-3 minimum

  • Justification: does it make sense to run an ANOVA on CTP ~ group, if the CTPs are related?

  • Eg. what does variance analysis look like when the y-variable for each individual is non-independent?

  • N.B. IDs go as columns, CTPs as rows

keep_pheno_col <- hseq_ctp_pheno.tmp %>% dplyr::select(all_of(keep_cols))
hseq_ctp_pheno.tmp.clr <- as.matrix(hseq_ctp_pheno.tmp %>% dplyr::select(c(all_of(level2_cols))))

# Make minimum CTP = 1e-3
length(which(hseq_ctp_pheno.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 4
hseq_ctp_pheno.tmp.clr0 <- hseq_ctp_pheno.tmp.clr
hseq_ctp_pheno.tmp.clr0[which(hseq_ctp_pheno.tmp.clr0 <= 1e-3)] <- 1e-3

# Perform clr-transform
hseq_ctp_pheno.tmp.clr0.tr <- clr(hseq_ctp_pheno.tmp.clr0)
hseq_ctp_pheno.clr.1e3 <- data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0.tr)

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno.clr.1e3, paste(out_dir, "/hseq_ctp_pheno.clr.1e3.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

Make dataframe

hseq_ctp_pheno.clr.1e3.long <- melt(hseq_ctp_pheno.clr.1e3, measure.vars = c(all_of(level2_cols)), variable.name = "celltype", value.name = "CTP")
hseq_ctp_pheno.clr.1e3.long$Region <- "PFC"
ggplot(hseq_ctp_pheno.clr.1e3.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("") +
  ggtitle("Minimum value = 1e-3")

6.1.2.1 Adjust for oligodendrocyte proportion

keep_pheno_col <- hseq_ctp_pheno_adjoligo.tmp %>% dplyr::select(all_of(keep_cols))
hseq_ctp_pheno_adjoligo.tmp.clr <- as.matrix(hseq_ctp_pheno_adjoligo.tmp %>% dplyr::select(c(all_of(neuron_n), all_of(glial_rmoligo_n))))

# Make minimum CTP = 1e-3
length(which(hseq_ctp_pheno_adjoligo.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 4
hseq_ctp_pheno_adjoligo.tmp.clr0 <- hseq_ctp_pheno_adjoligo.tmp.clr
hseq_ctp_pheno_adjoligo.tmp.clr0[which(hseq_ctp_pheno_adjoligo.tmp.clr0 <= 1e-3)] <- 1e-3

# Perform clr-transform
hseq_ctp_pheno_adjoligo.tmp.clr0.tr <- clr(hseq_ctp_pheno_adjoligo.tmp.clr0)
hseq_ctp_pheno_adjoligo.clr.1e3 <- data.frame(keep_pheno_col, hseq_ctp_pheno_adjoligo.tmp.clr0.tr)

if (write_out == TRUE) {
  write.table(hseq_ctp_pheno_adjoligo.clr.1e3, paste(out_dir, "/hseq_adjoligo_ctp_pheno.clr.1e3.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

Make dataframe

hseq_ctp_pheno_adjoligo.clr.1e3.long <- melt(hseq_ctp_pheno_adjoligo.clr.1e3, measure.vars = c(all_of(neuron_n), all_of(glial_rmoligo_n)), variable.name = "celltype", value.name = "CTP")
#methylcc_ctp_pheno.clr.zc.long$Level <- ifelse(methylcc_ctp_pheno.clr.zc.long$celltype %in% c("neuron_sum", "glia_sum"), "Level1", "Level2")
hseq_ctp_pheno_adjoligo.clr.1e3.long$Region <- "PFC"
ggplot(hseq_ctp_pheno_adjoligo.clr.1e3.long, aes(x=celltype, y = CTP, fill = group)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("") +
  ggtitle("Minimum value = 1e-3")

    #facet_grid(Region, scales='free', space = 'free_y') + coord_flip() + theme_bw() + xlab("")

7 Model: per-cell-type, compare raw

if (cellnum == 7) {
  
  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group, data = hseq_ctp_pheno.tmp))

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group + age + sex + batch + race + pmi, data = hseq_ctp_pheno.tmp))

} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group, data = methylcc_ctp_pheno.tmp))

  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group + age + sex + batch + race + pmi, data = methylcc_ctp_pheno.tmp))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.107495 -0.018705 -0.000705  0.019351  0.103232 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.2038934  0.0233193   8.744  < 2e-16 ***
## groupCTL    -0.0076950  0.0024354  -3.160 0.001647 ** 
## age          0.0004897  0.0002560   1.913 0.056194 .  
## sexM        -0.0094710  0.0024344  -3.891 0.000109 ***
## batch1      -0.0090001  0.0023925  -3.762 0.000183 ***
## race         0.0036690  0.0041822   0.877 0.380633    
## pmi          0.0001779  0.0002006   0.887 0.375441    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03075 on 708 degrees of freedom
## Multiple R-squared:  0.06801,    Adjusted R-squared:  0.06011 
## F-statistic: 8.611 on 6 and 708 DF,  p-value: 4.688e-09
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.066291 -0.011975  0.000034  0.012372  0.056259 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.673e-01  1.366e-02  12.246   <2e-16 ***
## groupCTL    -2.709e-03  1.427e-03  -1.899    0.058 .  
## age         -7.524e-05  1.500e-04  -0.502    0.616    
## sexM         2.067e-03  1.426e-03   1.449    0.148    
## batch1       1.250e-02  1.402e-03   8.918   <2e-16 ***
## race         3.055e-03  2.450e-03   1.247    0.213    
## pmi          1.157e-04  1.175e-04   0.985    0.325    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01801 on 708 degrees of freedom
## Multiple R-squared:  0.1129, Adjusted R-squared:  0.1054 
## F-statistic: 15.02 on 6 and 708 DF,  p-value: 3.199e-16
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + batch + race + 
##     pmi, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.066687 -0.013257  0.000935  0.013897  0.081743 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1721458  0.0163100  10.555  < 2e-16 ***
## groupCTL     0.0033298  0.0017033   1.955  0.05099 .  
## age         -0.0000502  0.0001791  -0.280  0.77933    
## sexM         0.0026255  0.0017026   1.542  0.12352    
## batch1      -0.0108697  0.0016734  -6.496 1.56e-10 ***
## race         0.0041251  0.0029251   1.410  0.15891    
## pmi         -0.0004393  0.0001403  -3.131  0.00181 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02151 on 708 degrees of freedom
## Multiple R-squared:  0.08521,    Adjusted R-squared:  0.07745 
## F-statistic: 10.99 on 6 and 708 DF,  p-value: 9.917e-12
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0136082 -0.0029819 -0.0000122  0.0028257  0.0164040 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.226e-02  3.440e-03   9.378  < 2e-16 ***
## groupCTL     1.651e-03  3.593e-04   4.595 5.12e-06 ***
## age         -4.307e-05  3.777e-05  -1.140   0.2546    
## sexM        -8.341e-04  3.591e-04  -2.323   0.0205 *  
## batch1       2.264e-03  3.529e-04   6.416 2.56e-10 ***
## race         8.428e-04  6.170e-04   1.366   0.1723    
## pmi         -4.548e-05  2.959e-05  -1.537   0.1248    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004536 on 708 degrees of freedom
## Multiple R-squared:  0.09086,    Adjusted R-squared:  0.08315 
## F-statistic: 11.79 on 6 and 708 DF,  p-value: 1.253e-12
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + batch + race + 
##     pmi, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.016100 -0.003552 -0.000580  0.003322  0.034024 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.766e-02  4.379e-03   6.316 4.73e-10 ***
## groupCTL     6.617e-04  4.574e-04   1.447    0.148    
## age         -5.379e-05  4.809e-05  -1.119    0.264    
## sexM         8.449e-04  4.572e-04   1.848    0.065 .  
## batch1       2.122e-03  4.493e-04   4.723 2.80e-06 ***
## race         2.054e-04  7.854e-04   0.262    0.794    
## pmi         -2.887e-05  3.767e-05  -0.766    0.444    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.005775 on 708 degrees of freedom
## Multiple R-squared:  0.0417, Adjusted R-squared:  0.03358 
## F-statistic: 5.134 on 6 and 708 DF,  p-value: 3.54e-05
## 
## 
## Response hseq_Oligo :
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + sex + batch + race + 
##     pmi, data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.141722 -0.034924 -0.006579  0.026341  0.234953 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.3194024  0.0378681   8.435   <2e-16 ***
## groupCTL     0.0047160  0.0039548   1.192   0.2335    
## age         -0.0002076  0.0004158  -0.499   0.6177    
## sexM         0.0051453  0.0039532   1.302   0.1935    
## batch1      -0.0038878  0.0038852  -1.001   0.3173    
## race        -0.0134220  0.0067915  -1.976   0.0485 *  
## pmi          0.0002422  0.0003258   0.744   0.4574    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04994 on 708 degrees of freedom
## Multiple R-squared:  0.01342,    Adjusted R-squared:  0.00506 
## F-statistic: 1.605 on 6 and 708 DF,  p-value: 0.1429
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0089936 -0.0021805 -0.0000361  0.0020671  0.0142636 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.948e-03  2.569e-03   2.315   0.0209 *  
## groupCTL     2.833e-04  2.683e-04   1.056   0.2914    
## age          2.225e-05  2.821e-05   0.789   0.4306    
## sexM        -6.107e-05  2.682e-04  -0.228   0.8200    
## batch1       4.260e-03  2.636e-04  16.161   <2e-16 ***
## race         9.813e-05  4.608e-04   0.213   0.8314    
## pmi         -4.323e-05  2.210e-05  -1.956   0.0509 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003388 on 708 degrees of freedom
## Multiple R-squared:  0.2709, Adjusted R-squared:  0.2648 
## F-statistic: 43.85 on 6 and 708 DF,  p-value: < 2.2e-16

7.1 Test aggregated sums

summary(lm(hseq_Glial ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = hseq_Glial ~ group, data = ctp_pheno)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.109356 -0.026825 -0.002075  0.023073  0.179581 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.516203   0.002347 219.939  < 2e-16 ***
## groupCTL    0.011893   0.003077   3.865 0.000121 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04058 on 713 degrees of freedom
## Multiple R-squared:  0.02052,    Adjusted R-squared:  0.01915 
## F-statistic: 14.94 on 1 and 713 DF,  p-value: 0.0001212
summary(lm(mcc_Glial ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = mcc_Glial ~ group, data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29512 -0.08984 -0.00749  0.08323  0.35525 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.456096   0.007127  63.996  < 2e-16 ***
## groupCTL    0.024421   0.009344   2.614  0.00915 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1232 on 713 degrees of freedom
## Multiple R-squared:  0.00949,    Adjusted R-squared:  0.008101 
## F-statistic: 6.831 on 1 and 713 DF,  p-value: 0.009147
summary(lm(h_NeuN_neg ~ group, data = ctp_pheno))
## 
## Call:
## lm(formula = h_NeuN_neg ~ group, data = ctp_pheno)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.157943 -0.035746 -0.000064  0.033708  0.192851 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.427636   0.003085 138.599  < 2e-16 ***
## groupCTL    0.014123   0.004045   3.491  0.00051 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05335 on 713 degrees of freedom
## Multiple R-squared:  0.01681,    Adjusted R-squared:  0.01543 
## F-statistic: 12.19 on 1 and 713 DF,  p-value: 0.0005102
summary(lm(hseq_Glial ~ group + age + sex + batch + race + pmi, data = ctp_pheno))
## 
## Call:
## lm(formula = hseq_Glial ~ group + age + sex + batch + race + 
##     pmi, data = ctp_pheno)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.112344 -0.026521 -0.001446  0.024434  0.191534 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.5574177  0.0305564  18.242  < 2e-16 ***
## groupCTL     0.0106417  0.0031912   3.335 0.000898 ***
## age         -0.0003325  0.0003355  -0.991 0.322078    
## sexM         0.0077205  0.0031899   2.420 0.015758 *  
## batch1      -0.0061108  0.0031350  -1.949 0.051666 .  
## race        -0.0081505  0.0054802  -1.487 0.137386    
## pmi         -0.0003146  0.0002629  -1.197 0.231716    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04029 on 708 degrees of freedom
## Multiple R-squared:  0.04126,    Adjusted R-squared:  0.03314 
## F-statistic: 5.078 on 6 and 708 DF,  p-value: 4.076e-05
summary(lm(mcc_Glial ~ group + age + sex + batch + race + pmi, data = ctp_pheno))
## 
## Call:
## lm(formula = mcc_Glial ~ group + age + sex + batch + race + pmi, 
##     data = ctp_pheno)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31660 -0.08489  0.00118  0.07883  0.37955 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.6040468  0.0923888   6.538 1.19e-10 ***
## groupCTL     0.0181389  0.0096487   1.880   0.0605 .  
## age         -0.0013827  0.0010144  -1.363   0.1733    
## sexM         0.0091320  0.0096447   0.947   0.3440    
## batch1      -0.0401530  0.0094789  -4.236 2.58e-05 ***
## race        -0.0011935  0.0165695  -0.072   0.9426    
## pmi         -0.0002321  0.0007948  -0.292   0.7704    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1218 on 708 degrees of freedom
## Multiple R-squared:  0.03878,    Adjusted R-squared:  0.03063 
## F-statistic:  4.76 on 6 and 708 DF,  p-value: 9.075e-05
summary(lm(h_NeuN_neg ~ group + age + sex + batch + race + pmi, data = ctp_pheno))
## 
## Call:
## lm(formula = h_NeuN_neg ~ group + age + sex + batch + race + 
##     pmi, data = ctp_pheno)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.163478 -0.035515 -0.001043  0.033275  0.188151 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.5180070  0.0391117  13.244  < 2e-16 ***
## groupCTL     0.0100405  0.0040847   2.458  0.01421 *  
## age         -0.0009043  0.0004294  -2.106  0.03557 *  
## sexM         0.0131519  0.0040830   3.221  0.00134 ** 
## batch1      -0.0231003  0.0040128  -5.757 1.28e-08 ***
## race         0.0032781  0.0070145   0.467  0.64041    
## pmi         -0.0004772  0.0003365  -1.418  0.15657    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05157 on 708 degrees of freedom
## Multiple R-squared:  0.08766,    Adjusted R-squared:  0.07993 
## F-statistic: 11.34 on 6 and 708 DF,  p-value: 4.045e-12

7.2 Adjust for oligodendrocyte proportion

if (cellnum == 7) {

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group, data = hseq_ctp_pheno_adjoligo.tmp))
  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group + age + sex + batch + race + pmi, data = hseq_ctp_pheno_adjoligo.tmp))

} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group, data = methylcc_ctp_pheno_adjoligo.tmp))
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group + age + sex + batch + race + pmi, data = methylcc_ctp_pheno_adjoligo.tmp))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.082089 -0.020570 -0.000001  0.020986  0.128824 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.3781132  0.0248053  15.243  < 2e-16 ***
## groupCTL    -0.0056948  0.0025906  -2.198   0.0283 *  
## age          0.0005690  0.0002724   2.089   0.0371 *  
## sexM        -0.0101841  0.0025895  -3.933 9.22e-05 ***
## batch1      -0.0189395  0.0025450  -7.442 2.88e-13 ***
## race        -0.0040852  0.0044487  -0.918   0.3588    
## pmi          0.0002863  0.0002134   1.342   0.1801    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03271 on 708 degrees of freedom
## Multiple R-squared:  0.1099, Adjusted R-squared:  0.1023 
## F-statistic: 14.56 on 6 and 708 DF,  p-value: 1.024e-15
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07570 -0.01805 -0.00226  0.01513  0.12163 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.125e-01  2.111e-02  14.800  < 2e-16 ***
## groupCTL     6.328e-06  2.205e-03   0.003 0.997711    
## age         -3.621e-04  2.318e-04  -1.562 0.118733    
## sexM         7.925e-03  2.204e-03   3.596 0.000346 ***
## batch1       1.855e-02  2.166e-03   8.564  < 2e-16 ***
## race        -2.613e-03  3.786e-03  -0.690 0.490439    
## pmi          2.495e-04  1.816e-04   1.374 0.169882    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02784 on 708 degrees of freedom
## Multiple R-squared:  0.1186, Adjusted R-squared:  0.1111 
## F-statistic: 15.88 on 6 and 708 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + batch + race + 
##     pmi, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.066687 -0.013257  0.000935  0.013897  0.081743 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.1721458  0.0163100  10.555  < 2e-16 ***
## groupCTL     0.0033298  0.0017033   1.955  0.05099 .  
## age         -0.0000502  0.0001791  -0.280  0.77933    
## sexM         0.0026255  0.0017026   1.542  0.12352    
## batch1      -0.0108697  0.0016734  -6.496 1.56e-10 ***
## race         0.0041251  0.0029251   1.410  0.15891    
## pmi         -0.0004393  0.0001403  -3.131  0.00181 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02151 on 708 degrees of freedom
## Multiple R-squared:  0.08521,    Adjusted R-squared:  0.07745 
## F-statistic: 10.99 on 6 and 708 DF,  p-value: 9.917e-12
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0136082 -0.0029819 -0.0000122  0.0028257  0.0164040 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.226e-02  3.440e-03   9.378  < 2e-16 ***
## groupCTL     1.651e-03  3.593e-04   4.595 5.12e-06 ***
## age         -4.307e-05  3.777e-05  -1.140   0.2546    
## sexM        -8.341e-04  3.591e-04  -2.323   0.0205 *  
## batch1       2.264e-03  3.529e-04   6.416 2.56e-10 ***
## race         8.428e-04  6.170e-04   1.366   0.1723    
## pmi         -4.548e-05  2.959e-05  -1.537   0.1248    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004536 on 708 degrees of freedom
## Multiple R-squared:  0.09086,    Adjusted R-squared:  0.08315 
## F-statistic: 11.79 on 6 and 708 DF,  p-value: 1.253e-12
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + batch + race + 
##     pmi, data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.016100 -0.003552 -0.000580  0.003322  0.034024 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.766e-02  4.379e-03   6.316 4.73e-10 ***
## groupCTL     6.617e-04  4.574e-04   1.447    0.148    
## age         -5.379e-05  4.809e-05  -1.119    0.264    
## sexM         8.449e-04  4.572e-04   1.848    0.065 .  
## batch1       2.122e-03  4.493e-04   4.723 2.80e-06 ***
## race         2.054e-04  7.854e-04   0.262    0.794    
## pmi         -2.887e-05  3.767e-05  -0.766    0.444    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.005775 on 708 degrees of freedom
## Multiple R-squared:  0.0417, Adjusted R-squared:  0.03358 
## F-statistic: 5.134 on 6 and 708 DF,  p-value: 3.54e-05
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno_adjoligo.tmp)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0089936 -0.0021805 -0.0000361  0.0020671  0.0142636 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.948e-03  2.569e-03   2.315   0.0209 *  
## groupCTL     2.833e-04  2.683e-04   1.056   0.2914    
## age          2.225e-05  2.821e-05   0.789   0.4306    
## sexM        -6.107e-05  2.682e-04  -0.228   0.8200    
## batch1       4.260e-03  2.636e-04  16.161   <2e-16 ***
## race         9.813e-05  4.608e-04   0.213   0.8314    
## pmi         -4.323e-05  2.210e-05  -1.956   0.0509 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003388 on 708 degrees of freedom
## Multiple R-squared:  0.2709, Adjusted R-squared:  0.2648 
## F-statistic: 43.85 on 6 and 708 DF,  p-value: < 2.2e-16

8 Model: per-cell-type, compare clr-transformed

if (cellnum == 7) {

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group, data = hseq_ctp_pheno.clr.1e3))
  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_Oligo, hseq_OPC) ~ group + age + sex + batch + race + pmi, data = hseq_ctp_pheno.clr.1e3))

} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group, data = methylcc_ctp_pheno.clr.1e3))
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_Oligo_MBP, NonN_OPC) ~ group + age + sex + batch + race + pmi, data = methylcc_ctp_pheno.clr.1e3))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.61199 -0.11511 -0.00003  0.11305  0.59511 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.065603   0.143838   7.408 3.65e-13 ***
## groupCTL    -0.044809   0.015022  -2.983  0.00295 ** 
## age          0.002048   0.001579   1.297  0.19516    
## sexM        -0.043744   0.015016  -2.913  0.00369 ** 
## batch1      -0.131548   0.014758  -8.914  < 2e-16 ***
## race         0.009632   0.025797   0.373  0.70899    
## pmi          0.002068   0.001237   1.671  0.09509 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1897 on 708 degrees of freedom
## Multiple R-squared:  0.1259, Adjusted R-squared:  0.1185 
## F-statistic:    17 on 6 and 708 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35463 -0.06711 -0.01175  0.05923  0.35623 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.8665104  0.0777024  11.152  < 2e-16 ***
## groupCTL    -0.0308171  0.0081149  -3.798 0.000159 ***
## age         -0.0005377  0.0008532  -0.630 0.528752    
## sexM         0.0079539  0.0081116   0.981 0.327147    
## batch1      -0.0200145  0.0079721  -2.511 0.012276 *  
## race         0.0121114  0.0139356   0.869 0.385088    
## pmi          0.0022198  0.0006684   3.321 0.000943 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1025 on 708 degrees of freedom
## Multiple R-squared:  0.04061,    Adjusted R-squared:  0.03248 
## F-statistic: 4.995 on 6 and 708 DF,  p-value: 5.037e-05
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + batch + race + 
##     pmi, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52551 -0.09358 -0.00108  0.09982  0.50140 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.9058485  0.1179217   7.682 5.23e-14 ***
## groupCTL     0.0057554  0.0123152   0.467    0.640    
## age         -0.0005046  0.0012948  -0.390    0.697    
## sexM         0.0115732  0.0123102   0.940    0.347    
## batch1      -0.1590512  0.0120985 -13.146  < 2e-16 ***
## race         0.0166123  0.0211487   0.785    0.432    
## pmi         -0.0013803  0.0010144  -1.361    0.174    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1555 on 708 degrees of freedom
## Multiple R-squared:  0.2045, Adjusted R-squared:  0.1978 
## F-statistic: 30.34 on 6 and 708 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32023 -0.05791  0.00431  0.06553  0.39770 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.846e-01  7.196e-02 -10.903  < 2e-16 ***
## groupCTL     3.906e-02  7.515e-03   5.197 2.66e-07 ***
## age         -1.462e-03  7.902e-04  -1.851  0.06461 .  
## sexM        -3.059e-02  7.512e-03  -4.072 5.19e-05 ***
## batch1      -1.941e-02  7.383e-03  -2.629  0.00876 ** 
## race         1.936e-02  1.291e-02   1.500  0.13403    
## pmi         -2.563e-05  6.191e-04  -0.041  0.96699    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09489 on 708 degrees of freedom
## Multiple R-squared:  0.08082,    Adjusted R-squared:  0.07303 
## F-statistic: 10.38 on 6 and 708 DF,  p-value: 4.868e-11
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + batch + race + 
##     pmi, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69719 -0.08813 -0.00355  0.09737  0.63183 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.9416575  0.1192014  -7.900 1.07e-14 ***
## groupCTL     0.0139691  0.0124489   1.122   0.2622    
## age         -0.0024015  0.0013088  -1.835   0.0670 .  
## sexM         0.0321127  0.0124438   2.581   0.0101 *  
## batch1       0.0020239  0.0122298   0.165   0.8686    
## race        -0.0017069  0.0213782  -0.080   0.9364    
## pmi          0.0005018  0.0010254   0.489   0.6247    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1572 on 708 degrees of freedom
## Multiple R-squared:  0.02193,    Adjusted R-squared:  0.01364 
## F-statistic: 2.646 on 6 and 708 DF,  p-value: 0.0152
## 
## 
## Response hseq_Oligo :
## 
## Call:
## lm(formula = hseq_Oligo ~ group + age + sex + batch + race + 
##     pmi, data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79621 -0.13513 -0.01171  0.12703  0.80730 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.520e+00  1.568e-01   9.695  < 2e-16 ***
## groupCTL     2.467e-05  1.637e-02   0.002   0.9988    
## age         -9.140e-04  1.721e-03  -0.531   0.5955    
## sexM         1.194e-02  1.636e-02   0.729   0.4660    
## batch1      -1.035e-01  1.608e-02  -6.434 2.29e-10 ***
## race        -5.163e-02  2.811e-02  -1.837   0.0667 .  
## pmi          2.107e-03  1.348e-03   1.563   0.1186    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2067 on 708 degrees of freedom
## Multiple R-squared:  0.06194,    Adjusted R-squared:  0.05399 
## F-statistic: 7.791 on 6 and 708 DF,  p-value: 3.906e-08
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.57065 -0.12977  0.04475  0.19557  0.81437 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.631386   0.251875 -10.447   <2e-16 ***
## groupCTL     0.016821   0.026305   0.639   0.5227    
## age          0.003772   0.002766   1.364   0.1730    
## sexM         0.010758   0.026294   0.409   0.6826    
## batch1       0.431467   0.025842  16.696   <2e-16 ***
## race        -0.004378   0.045173  -0.097   0.9228    
## pmi         -0.005491   0.002167  -2.534   0.0115 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3321 on 708 degrees of freedom
## Multiple R-squared:  0.2863, Adjusted R-squared:  0.2803 
## F-statistic: 47.34 on 6 and 708 DF,  p-value: < 2.2e-16

8.1 Adjust for oligodendrocyte proportion

if (cellnum == 7) {

  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group, data = hseq_ctp_pheno_adjoligo.clr.1e3))
  summary(lm(cbind(hseq_Exc, hseq_Inh, hseq_Astro, hseq_Endo, hseq_Micro, hseq_OPC) ~ group + age + sex + batch + race + pmi, data = hseq_ctp_pheno_adjoligo.clr.1e3))

} else if (cellnum == 9) {
  
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group, data = methylcc_ctp_pheno_adjoligo.clr.1e3))
  summary(lm(cbind(Exc_upper, Exc_lower, Inh_MGE, Inh_CGE, NonN_Astro_FGF3R, NonN_Endo, NonN_Micro, NonN_OPC) ~ group + age + sex + batch + race + pmi, data = methylcc_ctp_pheno_adjoligo.clr.1e3))

}
## Response hseq_Exc :
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55069 -0.11510  0.00097  0.10484  0.70219 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.729978   0.138015  12.535   <2e-16 ***
## groupCTL    -0.033143   0.014414  -2.299   0.0218 *  
## age          0.001445   0.001515   0.954   0.3405    
## sexM        -0.031724   0.014408  -2.202   0.0280 *  
## batch1      -0.154971   0.014160 -10.944   <2e-16 ***
## race        -0.016924   0.024752  -0.684   0.4944    
## pmi          0.002514   0.001187   2.117   0.0346 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.182 on 708 degrees of freedom
## Multiple R-squared:  0.1582, Adjusted R-squared:  0.151 
## F-statistic: 22.17 on 6 and 708 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Inh :
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51681 -0.07513 -0.00952  0.06449  0.40447 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.5308855  0.0824923  18.558  < 2e-16 ***
## groupCTL    -0.0191510  0.0086151  -2.223 0.026534 *  
## age         -0.0011403  0.0009058  -1.259 0.208462    
## sexM         0.0199741  0.0086116   2.319 0.020655 *  
## batch1      -0.0434371  0.0084636  -5.132  3.7e-07 ***
## race        -0.0144444  0.0147946  -0.976 0.329236    
## pmi          0.0026654  0.0007096   3.756 0.000187 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1088 on 708 degrees of freedom
## Multiple R-squared:  0.06434,    Adjusted R-squared:  0.05641 
## F-statistic: 8.114 on 6 and 708 DF,  p-value: 1.696e-08
## 
## 
## Response hseq_Astro :
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + sex + batch + race + 
##     pmi, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57673 -0.10045 -0.00202  0.10473  0.56930 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.536e-01  1.261e-01   7.559 1.26e-13 ***
## groupCTL    -7.155e-05  1.317e-02  -0.005    0.996    
## age         -4.318e-04  1.385e-03  -0.312    0.755    
## sexM         8.547e-03  1.317e-02   0.649    0.517    
## batch1      -1.732e-01  1.294e-02 -13.383  < 2e-16 ***
## race         1.698e-02  2.262e-02   0.751    0.453    
## pmi         -1.076e-03  1.085e-03  -0.992    0.322    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1663 on 708 degrees of freedom
## Multiple R-squared:  0.2073, Adjusted R-squared:  0.2005 
## F-statistic: 30.85 on 6 and 708 DF,  p-value: < 2.2e-16
## 
## 
## Response hseq_Endo :
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33734 -0.05729  0.00388  0.06036  0.46129 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.7368667  0.0712418 -10.343  < 2e-16 ***
## groupCTL     0.0332291  0.0074402   4.466 9.27e-06 ***
## age         -0.0013897  0.0007822  -1.777   0.0761 .  
## sexM        -0.0336160  0.0074371  -4.520 7.25e-06 ***
## batch1      -0.0335642  0.0073093  -4.592 5.20e-06 ***
## race         0.0197309  0.0127769   1.544   0.1230    
## pmi          0.0002783  0.0006129   0.454   0.6499    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09394 on 708 degrees of freedom
## Multiple R-squared:  0.09314,    Adjusted R-squared:  0.08546 
## F-statistic: 12.12 on 6 and 708 DF,  p-value: 5.389e-13
## 
## 
## Response hseq_Micro :
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + sex + batch + race + 
##     pmi, data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63884 -0.08541 -0.00038  0.09224  0.58907 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.8939247  0.1096891  -8.150 1.65e-15 ***
## groupCTL     0.0081422  0.0114555   0.711   0.4775    
## age         -0.0023287  0.0012044  -1.933   0.0536 .  
## sexM         0.0290867  0.0114508   2.540   0.0113 *  
## batch1      -0.0121320  0.0112539  -1.078   0.2814    
## race        -0.0013368  0.0196722  -0.068   0.9458    
## pmi          0.0008058  0.0009436   0.854   0.3934    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1446 on 708 degrees of freedom
## Multiple R-squared:  0.02286,    Adjusted R-squared:  0.01458 
## F-statistic: 2.761 on 6 and 708 DF,  p-value: 0.01167
## 
## 
## Response hseq_OPC :
## 
## Call:
## lm(formula = hseq_OPC ~ group + age + sex + batch + race + pmi, 
##     data = hseq_ctp_pheno_adjoligo.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.50274 -0.11903  0.04151  0.18724  0.75837 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.583653   0.240254 -10.754   <2e-16 ***
## groupCTL     0.010994   0.025091   0.438   0.6614    
## age          0.003845   0.002638   1.458   0.1454    
## sexM         0.007732   0.025081   0.308   0.7580    
## batch1       0.417311   0.024650  16.930   <2e-16 ***
## race        -0.004008   0.043088  -0.093   0.9259    
## pmi         -0.005187   0.002067  -2.510   0.0123 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3168 on 708 degrees of freedom
## Multiple R-squared:  0.2924, Adjusted R-squared:  0.2864 
## F-statistic: 48.76 on 6 and 708 DF,  p-value: < 2.2e-16

8.2 Check per-batch effect for Endo, Exc, Inh

#if (cellnum == 7) {
  hseq_ctp_pheno.clr.1e3$age2 <- hseq_ctp_pheno.clr.1e3$age^2
  summary(lm(hseq_Endo ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>%   filter(batch == 0)))
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(batch == 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29579 -0.06593  0.00042  0.07388  0.37897 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -1.300e+00  1.392e+00  -0.934   0.3513  
## groupCTL     1.272e-02  1.376e-02   0.924   0.3563  
## age          1.334e-02  3.385e-02   0.394   0.6938  
## age2        -9.728e-05  2.051e-04  -0.474   0.6357  
## sexM        -2.890e-02  1.381e-02  -2.093   0.0373 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1042 on 259 degrees of freedom
## Multiple R-squared:  0.0328, Adjusted R-squared:  0.01787 
## F-statistic: 2.196 on 4 and 259 DF,  p-value: 0.06985
  summary(lm(hseq_Endo ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>%   filter(batch == 1)))
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(batch == 1))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.309532 -0.050033  0.004614  0.059244  0.234283 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.1235107  1.1634108  -0.106 0.915501    
## groupCTL     0.0545639  0.0087948   6.204 1.26e-09 ***
## age         -0.0183923  0.0281175  -0.654 0.513370    
## age2         0.0001061  0.0001692   0.627 0.531092    
## sexM        -0.0328742  0.0088943  -3.696 0.000246 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08875 on 446 degrees of freedom
## Multiple R-squared:  0.1103, Adjusted R-squared:  0.1023 
## F-statistic: 13.82 on 4 and 446 DF,  p-value: 1.232e-10
  summary(lm(hseq_Exc ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>%   filter(batch == 0)))
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(batch == 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62405 -0.12198  0.00683  0.13444  0.61025 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -5.2078059  2.8329846  -1.838   0.0672 .
## groupCTL    -0.0393019  0.0280110  -1.403   0.1618  
## age          0.1554426  0.0688946   2.256   0.0249 *
## age2        -0.0009298  0.0004174  -2.228   0.0268 *
## sexM        -0.0372614  0.0280999  -1.326   0.1860  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.212 on 259 degrees of freedom
## Multiple R-squared:  0.03766,    Adjusted R-squared:  0.0228 
## F-statistic: 2.534 on 4 and 259 DF,  p-value: 0.04076
  summary(lm(hseq_Exc ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>%   filter(batch == 1)))
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(batch == 1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45746 -0.11957 -0.00239  0.10316  0.56615 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  2.9022386  2.2956591   1.264   0.2068   
## groupCTL    -0.0460820  0.0173540  -2.655   0.0082 **
## age         -0.0450890  0.0554818  -0.813   0.4168   
## age2         0.0002846  0.0003339   0.853   0.3944   
## sexM        -0.0441951  0.0175503  -2.518   0.0121 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1751 on 446 degrees of freedom
## Multiple R-squared:  0.04693,    Adjusted R-squared:  0.03838 
## F-statistic:  5.49 on 4 and 446 DF,  p-value: 0.0002538
  summary(lm(hseq_Inh ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>%   filter(batch == 0)))
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(batch == 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35142 -0.07523 -0.01818  0.07286  0.35341 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -1.1403314  1.5425474  -0.739   0.4604  
## groupCTL    -0.0310897  0.0152519  -2.038   0.0425 *
## age          0.0492412  0.0375128   1.313   0.1905  
## age2        -0.0003022  0.0002273  -1.330   0.1848  
## sexM        -0.0002573  0.0153003  -0.017   0.9866  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1155 on 259 degrees of freedom
## Multiple R-squared:  0.02101,    Adjusted R-squared:  0.005893 
## F-statistic:  1.39 on 4 and 259 DF,  p-value: 0.2379
  summary(lm(hseq_Inh ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>%   filter(batch == 1)))
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + age2 + sex, data = hseq_ctp_pheno.clr.1e3 %>% 
##     filter(batch == 1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28285 -0.06655 -0.00748  0.06341  0.32875 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  3.2747229  1.2476600   2.625  0.00897 **
## groupCTL    -0.0276583  0.0094316  -2.932  0.00354 **
## age         -0.0587926  0.0301536  -1.950  0.05183 . 
## age2         0.0003512  0.0001814   1.936  0.05353 . 
## sexM         0.0158703  0.0095383   1.664  0.09685 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09517 on 446 degrees of freedom
## Multiple R-squared:  0.03171,    Adjusted R-squared:  0.02302 
## F-statistic: 3.651 on 4 and 446 DF,  p-value: 0.006117
#}

9 Model: per-cell-type, compare ratios between other CTPs and Inh

  • dealing with this directly as a ratio, so no need to do transform (what an alr would do anyway)
hseq_ctp_pheno.tmp.clr0_ratio <- data.frame(hseq_ctp_pheno.tmp.clr0)
hseq_ctp_pheno.tmp.clr0_ratio$exc_inh <- hseq_ctp_pheno.tmp.clr0_ratio$hseq_Exc/hseq_ctp_pheno.tmp.clr0_ratio$hseq_Inh

hseq_ctp_pheno.tmp.clr0_ratio.df <- data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0_ratio)

summary(lm(exc_inh ~ group, data = hseq_ctp_pheno.tmp.clr0_ratio.df))

summary(lm(exc_inh ~ group + age + sex + batch + race + pmi, data = hseq_ctp_pheno.tmp.clr0_ratio.df))

10 Model: ALDEx2 for differential proportion

  • Documentation: https://www.bioconductor.org/packages/release/bioc/vignettes/ALDEx2/inst/doc/ALDEx2_vignette.pdf
  • Uses Bayesian sampling from a Dirichlet distribution to estimate underlying technical variation
  • Input count data, takes as given (requires integers, so rounded the back-derived count data)
  • –> infers technical variation (eg. sequencing the same sample again) multiple times using aldex.clr()
    • aldex.clr(): recommend mc.samples >128 for t-test, >1000 for rigorous effect size calculations, >16 for ANOVA
  • Outputs (see p10 of documentation):
    • Tables
      • we*: Welch’s t-test
      • wi*: Wilcoxon rank test
      • *ep: expected p-value
      • *eBH: expected BH-correction
      • rab*: median clr value for a given group
      • dif.btw: median difference in clr value between groups
      • dif.win: median of largest difference in clr values within groups
      • effect: median effect size (diff.btw/max(diff.win))
      • overlap: prop of effect size that overlaps 0 (ie. no effect)
    • Plots
      • relationships between Abundance (clr) vs Difference + Dispersion vs Difference
      • red denotes differentially abundant with q < 0.1, grey abundant but not differentially-abundant, black are rare but not differentially abundant)

10.1 methylCC

# Remove NA
foo <- hseq_ctp_pheno.tmp %>% filter(!is.na(age)) %>% filter(!is.na(sex)) %>% filter(!is.na(batch)) %>% filter(!is.na(race)) %>% filter(!is.na(pmi))

10.1.1 No covariates

conds <- foo$group

aldex_in <- sapply(data.frame(t(foo[,c(all_of(level2_cols))])*10000), as.integer)
rownames(aldex_in) <- level2_cols
colnames(aldex_in) <- foo$IID

x <- aldex.clr(aldex_in, conds, mc.samples=128, denom="all", verbose=F)
## operating in serial mode
## computing center with all features
x.tt <- aldex.ttest(x, paired.test = F)

x.effect <- aldex.effect(x, verbose = F)

x.all <- data.frame(x.tt, x.effect)

tmp <- rownames(x.all)
x.all <- as.data.frame(sapply(x.all, function(x) signif(x, 2)))
rownames(x.all) <- tmp

datatable(x.all)
par(mfrow=c(1,2))
aldex.plot(x.all, type="MA", test="welch", xlab="Log-ratio abundance",ylab="Difference")
aldex.plot(x.all, type="MW", test="welch", xlab="Dispersion",ylab="Difference")

10.1.2 glm + covariates

  • Runs a glm
  • Input model matrix
conds <- foo$group
cov.aldex <- foo %>% dplyr::select(c("group", "age", "sex", "batch", "race", "pmi"))
mm <- model.matrix(~., cov.aldex)

x <- aldex.clr(aldex_in, mm, mc.samples=128, denom="all", verbose=F)

x.glm <- aldex.glm(x, mm)
## Warning in if (verbose) message("running tests for each MC instance:"): the
## condition has length > 1 and only the first element will be used
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## |--
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -(25%)
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -(50%)
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -(75%)
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -
## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used

## Warning in if (verbose == TRUE) numTicks <- progress(i, k, numTicks): the
## condition has length > 1 and only the first element will be used
## -|
x.all2 <- data.frame(x.glm)

tmp <- rownames(x.all2)
x.all2 <- as.data.frame(sapply(x.all2, function(x) signif(x, 2)))
rownames(x.all2) <- tmp

datatable(x.all2)

11 Model: ANCOM for differential proportion

11.1 methylCC

11.1.1 Prep

# Prepare data
metadata <- hseq_ctp_pheno.tmp %>% dplyr::select(c("IID", "group", "age", "sex", "batch", "race", "pmi"))
rownames(metadata) <- metadata$IID
metadata$group <- as.factor(metadata$group)

ancom_features <- t(hseq_ctp_pheno.tmp[,c(all_of(level2_cols))])*10000
colnames(ancom_features) <- hseq_ctp_pheno.tmp$IID
ancom_features <- ancom_features[,which(colnames(ancom_features) %in% metadata$IID)]

# Check variables in the correct order
identical(colnames(ancom_features), metadata$IID)
## [1] TRUE
ancom_table <- feature_table_pre_process(ancom_features, metadata, "IID", group_var = NULL, out_cut = 0.05, zero_cut = 0.90, lib_cut = 10, neg_lb)

11.1.2 Case-control analysis with no covariates

# Run tests with covariates (age + sex + diet)
ancom_out_nocov <- ANCOM(ancom_table$feature_table, ancom_table$meta_data, ancom_table$structure_zeros, main_var = "group", p_adj_method = "BH")

ancom_out_nocov$out <- ancom_out_nocov$out[order(ancom_out_nocov$out$W, decreasing = T),]

datatable(ancom_out_nocov$out)
ancom_out_nocov$fig

11.1.3 Case-control analysis with covariates

# Run tests with covariates (age + sex + diet)
ancom_out_cov <- ANCOM(ancom_table$feature_table, ancom_table$meta_data, ancom_table$structure_zeros, main_var = "group", p_adj_method = "BH", adj_formula = "age + sex + batch + race + pmi")

ancom_out_cov$out <- ancom_out_cov$out[order(ancom_out_cov$out$W, decreasing = T),]

datatable(ancom_out_cov$out)
ancom_out_cov$fig

12 Correlation plots

cov_var <- c("IID", "group", "age", "sex", "batch", "Sample_Plate", "race", "pmi")

12.1 Glial

12.1.1 Aggregated

  • eBayes + Houseman (coefs_brain): h_NeuN_neg
  • methylCC (summed): Glial
  • smartSVA: sSV1
  • VAE: VAEe9
  • eBayes + Houseman (summed coefs_sub): h_Glial
    • include methylCC oligo since it seems to go together …
# Select columns
neunn <- ctp_pheno %>% dplyr::select(c(all_of(cov_var), "h_NeuN_neg", "hseq_Glial", "mcc_Glial", "sSV1", "sSV2", "h_Glial", "hseq_Oligo", "hseq_OPC", "mcc_Oligo", "mcc_OPC", "h_OLIGO", "h_OPC", "celfie_Glial", "celfie_Oligo", "celfie_OPC", "VAEe3", "VAEe9"))

# y = h_NeuN_neg
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "h_NeuN_neg"), measure.var = c("hseq_Glial", "mcc_Glial", "h_Glial", "celfie_Glial", "sSV2", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=h_NeuN_neg)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: hseq_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "hseq_Glial"), measure.var = c("h_NeuN_neg", "mcc_Glial", "h_Glial", "celfie_Glial", "sSV2", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=hseq_Glial)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: mcc_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "mcc_Glial"), measure.var = c("h_NeuN_neg", "hseq_Glial", "h_Glial", "celfie_Glial", "sSV2", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=mcc_Glial)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: h_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "h_Glial"), measure.var = c("h_NeuN_neg", "hseq_Glial", "mcc_Glial", "celfie_Glial", "sSV2", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=h_Glial)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: celfie_Glial
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "celfie_Glial"), measure.var = c("h_NeuN_neg", "hseq_Glial", "mcc_Glial", "h_Glial", "sSV2", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=celfie_Glial)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: sSV2
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "sSV2"), measure.var = c("h_NeuN_neg", "hseq_Oligo", "mcc_Oligo", "h_OLIGO", "celfie_Oligo", "VAEe9"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=sSV2)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: sSV2
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "sSV2"), measure.var = c("h_NeuN_neg", "hseq_Glial", "mcc_Glial", "h_Glial", "celfie_Glial", "hseq_Oligo", "hseq_OPC", "mcc_Oligo", "mcc_OPC", "h_OLIGO", "h_OPC", "celfie_Oligo", "celfie_OPC", "VAEe9"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=sSV2)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: VAEe3
neunn.long <- melt(neunn, id.var = c(all_of(cov_var), "VAEe9"), measure.var = c("h_NeuN_neg", "hseq_Oligo", "mcc_Oligo", "h_OLIGO", "celfie_Oligo", "sSV2"), variable.name = c("method"), value.name = "CTP")
ggplot(neunn.long, aes(x=CTP, y=VAEe9)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = TRUE) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

12.1.2 Compare individual glial sub-type CTPs

# Comparison between houseman extremes vs eBayes
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_NeuN_neg", "h_Glial", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")])

ggpairs(ctp_pheno_sub_glial)

# Comparison between houseman extremes vs methylCC extremes
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_NeuN_neg", "h_Glial", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")])

ggpairs(ctp_pheno_sub_glial)

# Comparison between houseman extremes vs CelFIE
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_NeuN_neg", "celfie_Glial", "celfie_Astro", "celfie_Endo", "celfie_Micro", "celfie_Oligo", "celfie_OPC")])

ggpairs(ctp_pheno_sub_glial)

# Comparison between houseman array vs methylCC extremes
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("h_Glial", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC", "h_NeuN_neg", "mcc_Glial", "mcc_Astro", "mcc_Endo", "mcc_Micro", "mcc_Oligo", "mcc_OPC")])

ggpairs(ctp_pheno_sub_glial)

# Comparison between houseman array vs methylCC extremes
ctp_pheno_sub_glial <- data.frame(ctp_pheno[,c("h_Glial", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC", "h_NeuN_neg", "mcc_Glial", "mcc_Astro", "mcc_Endo", "mcc_Micro", "mcc_Oligo", "mcc_OPC")])

ggpairs(ctp_pheno_sub_glial)

12.2 Neuronal

12.2.1 Aggregated

# Select columns
neunp <- ctp_pheno %>% dplyr::select(c(all_of(cov_var), "h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "sSV2", "VAEe3"))

# y: h_NeuN_pos
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "h_NeuN_pos"), measure.var = c("hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "sSV2", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=h_NeuN_pos)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: hseq_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "hseq_Neuron"), measure.var = c("h_NeuN_pos", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "sSV2", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=hseq_Neuron)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: mcc_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "mcc_Neuron"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "h_Neuron", "celfie_Neuron", "sSV2", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=mcc_Neuron)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: h_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "h_Neuron"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "celfie_Neuron", "sSV2", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=h_Neuron)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: celfie_Neuron
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "celfie_Neuron"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "sSV2", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=celfie_Neuron)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: sSV2
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "sSV2"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "VAEe3"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=sSV2)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

# y: VAEe3
neunp.long <- melt(neunp, id.var = c(all_of(cov_var), "VAEe3"), measure.var = c("h_NeuN_pos", "hseq_Neuron", "mcc_Neuron", "h_Neuron", "celfie_Neuron", "sSV2"), variable.name = c("method"), value.name = "CTP")
ggplot(neunp.long, aes(x=CTP, y=VAEe3)) + geom_point(aes(colour = Sample_Plate)) + scale_colour_viridis(discrete = T) + geom_smooth(method = "lm") + facet_wrap(~method, scales = "free_x")
## `geom_smooth()` using formula 'y ~ x'

12.2.2 Compare individual neuronal sub-type CTPs

if (cellnum == 7) {
  ctp_pheno_sub_neuron <- data.frame(ctp_pheno[,c("hseq_Neuron", "hseq_Exc", "hseq_Inh", "h_Neuron", "h_GLU", "h_GABA", "mcc_Neuron", "mcc_Exc", "mcc_Inh", "h_NeuN_pos")])

} else if (cellnum == 9) {
  ctp_pheno_sub_neuron <- data.frame(ctp_pheno[,c("Neuron", "Exc_upper", "Exc_lower", "Inh_MGE", "Inh_CGE", "h_NeuN_pos", "h_Neuron", "h_GABA", "h_GLU")])
}

ggpairs(ctp_pheno_sub_neuron)

12.3 All together

ctp_pheno_sub <- data.frame(ctp_pheno[,c("hseq_Exc", "hseq_Inh", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "h_GLU", "h_GABA", "h_ASTRO", "h_ENDO", "h_MONO", "h_OLIGO", "h_OPC")])

#svg(paste(out_dir, "/", filen, "_ctp_hseq_harray_cor_matrix.svg", sep = ""), height = 6, width = 6)
jpeg(paste(out_dir, "/", filen, "_ctp_hseq_harray_cor_matrix.jpeg", sep = ""), height = 600, width = 600, quality = 100)
g <- ggduo(ctp_pheno_sub, colnames(ctp_pheno_sub)[1:7], colnames(ctp_pheno_sub)[8:14], xlab = "Houseman + sequencing reference", ylab = "Houseman + array reference", title = "ROSMAP")
print(g)
## plot: [1,1] [>-------------------------------------------------] 2% est: 0s
## plot: [1,2] [=>------------------------------------------------] 4% est: 1s
## plot: [1,3] [==>-----------------------------------------------] 6% est: 3s
## plot: [1,4] [===>----------------------------------------------] 8% est: 3s
## plot: [1,5] [====>---------------------------------------------] 10% est: 2s
## plot: [1,6] [=====>--------------------------------------------] 12% est: 2s
## plot: [1,7] [======>-------------------------------------------] 14% est: 2s
## plot: [2,1] [=======>------------------------------------------] 16% est: 2s
## plot: [2,2] [========>-----------------------------------------] 18% est: 2s
## plot: [2,3] [=========>----------------------------------------] 20% est: 2s
## plot: [2,4] [==========>---------------------------------------] 22% est: 2s
## plot: [2,5] [===========>--------------------------------------] 24% est: 2s
## plot: [2,6] [============>-------------------------------------] 27% est: 2s
## plot: [2,7] [=============>------------------------------------] 29% est: 2s
## plot: [3,1] [==============>-----------------------------------] 31% est: 2s
## plot: [3,2] [===============>----------------------------------] 33% est: 2s
## plot: [3,3] [================>---------------------------------] 35% est: 2s
## plot: [3,4] [=================>--------------------------------] 37% est: 1s
## plot: [3,5] [==================>-------------------------------] 39% est: 1s
## plot: [3,6] [===================>------------------------------] 41% est: 1s
## plot: [3,7] [====================>-----------------------------] 43% est: 1s
## plot: [4,1] [=====================>----------------------------] 45% est: 1s
## plot: [4,2] [======================>---------------------------] 47% est: 1s
## plot: [4,3] [=======================>--------------------------] 49% est: 1s
## plot: [4,4] [=========================>------------------------] 51% est: 1s
## plot: [4,5] [==========================>-----------------------] 53% est: 1s
## plot: [4,6] [===========================>----------------------] 55% est: 1s
## plot: [4,7] [============================>---------------------] 57% est: 1s
## plot: [5,1] [=============================>--------------------] 59% est: 1s
## plot: [5,2] [==============================>-------------------] 61% est: 1s
## plot: [5,3] [===============================>------------------] 63% est: 1s
## plot: [5,4] [================================>-----------------] 65% est: 1s
## plot: [5,5] [=================================>----------------] 67% est: 1s
## plot: [5,6] [==================================>---------------] 69% est: 1s
## plot: [5,7] [===================================>--------------] 71% est: 1s
## plot: [6,1] [====================================>-------------] 73% est: 1s
## plot: [6,2] [=====================================>------------] 76% est: 1s
## plot: [6,3] [======================================>-----------] 78% est: 1s
## plot: [6,4] [=======================================>----------] 80% est: 0s
## plot: [6,5] [========================================>---------] 82% est: 0s
## plot: [6,6] [=========================================>--------] 84% est: 0s
## plot: [6,7] [==========================================>-------] 86% est: 0s
## plot: [7,1] [===========================================>------] 88% est: 0s
## plot: [7,2] [============================================>-----] 90% est: 0s
## plot: [7,3] [=============================================>----] 92% est: 0s
## plot: [7,4] [==============================================>---] 94% est: 0s
## plot: [7,5] [===============================================>--] 96% est: 0s
## plot: [7,6] [================================================>-] 98% est: 0s
## plot: [7,7] [==================================================]100% est: 0s
dev.off()
## X11cairo 
##        2

12.4 Unsupervised comparison

12.4.1 Neurons

if (cellnum == 7) {
  ctp_pheno_sv_neuron <- data.frame(ctp_pheno[,c("h_NeuN_pos", "hseq_Neuron", "hseq_Exc", "hseq_Inh", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")])
} else if (cellnum == 9) {
  ctp_pheno_sv_neuron <- data.frame(ctp_pheno[,c("h_NeuN_pos", "Neuron", "Exc_upper", "Exc_lower", "Inh_MGE", "Inh_CGE", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")])
}

ggpairs(ctp_pheno_sv_neuron)

12.4.2 Glial

ctp_pheno_sv_glial <- data.frame(ctp_pheno[,c("h_NeuN_neg", "hseq_Glial", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC", "sSV1", "sSV2", "sSV3", "sSV4", "sSV5", "sSV6", "sSV7", "sSV8")])

ggpairs(ctp_pheno_sv_glial)

13 Comparison to IHC cell proportions

13.1 Munge data

13.1.1 no clr, no adjoligo

ihc_dir <- "~/shared-gandalm/GenomicDatasets/ROSMAP/ROSMAP_IHC"

ihc_neuro <- fread(paste(ihc_dir, "/IHC.neuro.txt", sep = ""), header = T)
ihc_astro <- fread(paste(ihc_dir, "/IHC.astro.txt", sep = ""), header = T)
ihc_endo <- fread(paste(ihc_dir, "/IHC.endo.txt", sep = ""), header = T)
ihc_micro <- fread(paste(ihc_dir, "/IHC.microglia.txt", sep = ""), header = T)
ihc_oligo <- fread(paste(ihc_dir, "/IHC.oligo.txt", sep = ""), header = T)

ihc <- data.frame(projid = colnames(ihc_neuro), neuro_ihc = t(ihc_neuro[1,])) %>%
  inner_join(data.frame(projid = colnames(ihc_astro), astro_ihc = t(ihc_astro[1,])), by = "projid") %>% 
  inner_join(data.frame(projid = colnames(ihc_endo), endo_ihc = t(ihc_endo[1,])), by = "projid") %>% 
  inner_join(data.frame(projid = colnames(ihc_micro), micro_ihc = t(ihc_micro[1,])), by = "projid") %>% 
  inner_join(data.frame(projid = colnames(ihc_oligo), oligo_ihc = t(ihc_oligo[1,])), by = "projid")
ihc$projid <- as.integer(ihc$projid)
ihc <- as.matrix(ihc)
ihc[which(is.na(ihc))] <- 0
ihc <- data.frame(ihc)

ctp_pheno_ihc <- inner_join(ctp_pheno, ihc, by = "projid")

ctp_pheno_ihc_hseq <- ctp_pheno_ihc[,c("IID", level2_cols, "neuro_ihc", "astro_ihc", "endo_ihc", "micro_ihc", "oligo_ihc")]

13.1.2 no clr, adjoligo

# Make neuron aggregated variable for comparison to IHC
# - "transfer" all oligodendrocyte fraction to the Neuron fraction
ctp_pheno_ihc_hseq_rmoligo <- ctp_pheno_ihc_hseq %>% 
  mutate(hseq_Neuron = hseq_Exc + hseq_Inh + hseq_Oligo) %>%
  mutate(neuro_ihc = neuro_ihc + oligo_ihc) %>% 
  dplyr::select(-c("hseq_Oligo", "oligo_ihc"))

13.1.3 clr, no adjoligo

# clr transform
ihc.clr.1e3.tmp0 <- as.matrix(ihc[,c("neuro_ihc", "astro_ihc", "endo_ihc", "micro_ihc", "oligo_ihc")])
ihc.clr.1e3.tmp0[which(ihc.clr.1e3.tmp0 < 1e-3)] <- 1e-3
ihc.clr.1e3.tmp <- clr(ihc.clr.1e3.tmp0)
ihc.clr.1e3 <- data.frame(projid = ihc$projid, ihc.clr.1e3.tmp)

# Redo the hseq clr-transform with Neuron sum
hseq_ctp_pheno.tmp.clr <- as.matrix(hseq_ctp_pheno.tmp %>% dplyr::select(c("hseq_Neuron", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC")))
length(which(hseq_ctp_pheno.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 4
hseq_ctp_pheno.tmp.clr0 <- hseq_ctp_pheno.tmp.clr
hseq_ctp_pheno.tmp.clr0[which(hseq_ctp_pheno.tmp.clr0 <= 1e-3)] <- 1e-3
hseq_ctp_pheno.tmp.clr0.tr <- clr(hseq_ctp_pheno.tmp.clr0)
#hseq_ctp_pheno.clr.1e3 <- data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0.tr)

hseq_ctp_pheno.clr.1e3.tmp <- inner_join(data.frame(keep_pheno_col, hseq_ctp_pheno.tmp.clr0.tr), ctp_pheno[,c("IID", "projid")], by = "IID")
ctp_pheno_ihc.clr.1e3 <- inner_join(hseq_ctp_pheno.clr.1e3.tmp, ihc.clr.1e3, by = "projid")

ctp_pheno_ihc_hseq.clr.1e3 <- ctp_pheno_ihc.clr.1e3[,c("IID", all_of(c("hseq_Neuron", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_Oligo", "hseq_OPC")), "neuro_ihc", "astro_ihc", "endo_ihc", "micro_ihc", "oligo_ihc")]

13.1.4 clr, adjoligo

ihc_rmoligo <- ihc %>% 
  mutate(neuro_ihc = neuro_ihc + oligo_ihc) %>%
  dplyr::select(-c("oligo_ihc"))

# clr transform
ihc_rmoligo.clr.1e3.tmp0 <- as.matrix(ihc[,c("neuro_ihc", "astro_ihc", "endo_ihc", "micro_ihc")])
ihc_rmoligo.clr.1e3.tmp0[which(ihc_rmoligo.clr.1e3.tmp0 < 1e-3)] <- 1e-3
ihc_rmoligo.clr.1e3.tmp <- clr(ihc_rmoligo.clr.1e3.tmp0)
ihc_rmoligo.clr.1e3 <- data.frame(projid = ihc$projid, ihc_rmoligo.clr.1e3.tmp)

# Redo the hseq clr-transform with Neuron sum
hseq_ctp_pheno_adjoligo.tmp.clr <- as.matrix(hseq_ctp_pheno_adjoligo.tmp %>% dplyr::select(c("hseq_Neuron", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_OPC")))
length(which(hseq_ctp_pheno_adjoligo.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 4
hseq_ctp_pheno_adjoligo.tmp.clr0 <- hseq_ctp_pheno_adjoligo.tmp.clr
hseq_ctp_pheno_adjoligo.tmp.clr0[which(hseq_ctp_pheno_adjoligo.tmp.clr0 <= 1e-3)] <- 1e-3
hseq_ctp_pheno_adjoligo.tmp.clr0.tr <- clr(hseq_ctp_pheno_adjoligo.tmp.clr0)

hseq_ctp_pheno_adjoligo.clr.1e3.tmp <- inner_join(data.frame(keep_pheno_col, hseq_ctp_pheno_adjoligo.tmp.clr0.tr), ctp_pheno[,c("IID", "projid")], by = "IID")

ctp_pheno_ihc_rmoligo.clr.1e3 <- inner_join(hseq_ctp_pheno_adjoligo.clr.1e3.tmp, ihc_rmoligo.clr.1e3, by = "projid")

ctp_pheno_ihc_rmoligo_hseq.clr.1e3 <- ctp_pheno_ihc_rmoligo.clr.1e3[,c("IID", all_of(c("hseq_Neuron", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_OPC")), "neuro_ihc", "astro_ihc", "endo_ihc", "micro_ihc")]

13.2 Write output

if (write_out == TRUE) {
  write.table(ctp_pheno_ihc, paste(out_dir, "/", filen, "_ctp_pheno_ihc.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
  write.table(ctp_pheno_ihc_hseq, paste(out_dir, "/", filen, "_ctp_pheno_hseq_ihc.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
  write.table(ctp_pheno_ihc_hseq_rmoligo, paste(out_dir, "/", filen, "_ctp_pheno_hseq_ihc_rmoligo.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
  write.table(ctp_pheno_ihc_hseq.clr.1e3, paste(out_dir, "/", filen, "_ctp_pheno_hseq_ihc_clr.1e3.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
  write.table(ctp_pheno_ihc_rmoligo_hseq.clr.1e3, paste(out_dir, "/", filen, "_ctp_pheno_hseq_ihc_rmoligo_clr.1e3.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
}

13.3 Correlation

13.3.1 no adjoligo, no clr

print("Neurons")
## [1] "Neurons"
cor.test(ctp_pheno_ihc$neuro_ihc, ctp_pheno_ihc$hseq_Neuron)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$neuro_ihc and ctp_pheno_ihc$hseq_Neuron
## t = -0.39395, df = 47, p-value = 0.6954
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3331899  0.2274983
## sample estimates:
##        cor 
## -0.0573682
cor.test(ctp_pheno_ihc$neuro_ihc, ctp_pheno_ihc$mcc_Neuron)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$neuro_ihc and ctp_pheno_ihc$mcc_Neuron
## t = 0.84458, df = 47, p-value = 0.4026
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1645857  0.3900554
## sample estimates:
##       cor 
## 0.1222697
cor.test(ctp_pheno_ihc$neuro_ihc, ctp_pheno_ihc$h_Neuron)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$neuro_ihc and ctp_pheno_ihc$h_Neuron
## t = -0.16098, df = 47, p-value = 0.8728
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3026729  0.2594349
## sample estimates:
##         cor 
## -0.02347421
print("Astrocytes")
## [1] "Astrocytes"
cor.test(ctp_pheno_ihc$astro_ihc, ctp_pheno_ihc$hseq_Astro)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$astro_ihc and ctp_pheno_ihc$hseq_Astro
## t = 1.7801, df = 47, p-value = 0.08152
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03214714  0.49736851
## sample estimates:
##       cor 
## 0.2513213
cor.test(ctp_pheno_ihc$astro_ihc, ctp_pheno_ihc$mcc_Astro)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$astro_ihc and ctp_pheno_ihc$mcc_Astro
## t = 1.9765, df = 47, p-value = 0.05398
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.004531529  0.517874043
## sample estimates:
##      cor 
## 0.277018
cor.test(ctp_pheno_ihc$astro_ihc, ctp_pheno_ihc$h_ASTRO)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$astro_ihc and ctp_pheno_ihc$h_ASTRO
## t = 0.6339, df = 47, p-value = 0.5292
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1941526  0.3638472
## sample estimates:
##        cor 
## 0.09207058
print("Endothelial")
## [1] "Endothelial"
cor.test(ctp_pheno_ihc$endo_ihc, ctp_pheno_ihc$hseq_Endo)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$endo_ihc and ctp_pheno_ihc$hseq_Endo
## t = 1.4661, df = 47, p-value = 0.1493
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07657583  0.46308851
## sample estimates:
##       cor 
## 0.2091238
cor.test(ctp_pheno_ihc$endo_ihc, ctp_pheno_ihc$mcc_Endo)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$endo_ihc and ctp_pheno_ihc$mcc_Endo
## t = 0.97835, df = 47, p-value = 0.3329
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1457092  0.4063304
## sample estimates:
##       cor 
## 0.1412758
cor.test(ctp_pheno_ihc$endo_ihc, ctp_pheno_ihc$h_ENDO)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$endo_ihc and ctp_pheno_ihc$h_ENDO
## t = 1.1408, df = 47, p-value = 0.2598
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1227221  0.4256877
## sample estimates:
##       cor 
## 0.1641388
print("Microglia")
## [1] "Microglia"
cor.test(ctp_pheno_ihc$micro_ihc, ctp_pheno_ihc$hseq_Micro)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$micro_ihc and ctp_pheno_ihc$hseq_Micro
## t = 2.3986, df = 47, p-value = 0.02048
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.05407108 0.55948672
## sample estimates:
##       cor 
## 0.3302464
cor.test(ctp_pheno_ihc$micro_ihc, ctp_pheno_ihc$mcc_Micro)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$micro_ihc and ctp_pheno_ihc$mcc_Micro
## t = 1.0132, df = 47, p-value = 0.3161
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1407753  0.4105274
## sample estimates:
##      cor 
## 0.146209
cor.test(ctp_pheno_ihc$micro_ihc, ctp_pheno_ihc$h_MONO)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$micro_ihc and ctp_pheno_ihc$h_MONO
## t = 2.0768, df = 47, p-value = 0.04331
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.009502971 0.528070036
## sample estimates:
##       cor 
## 0.2899248
print("Oligodendrocytes/OPCs")
## [1] "Oligodendrocytes/OPCs"
cor.test(ctp_pheno_ihc$oligo_ihc, ctp_pheno_ihc$hseq_Oligo)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$oligo_ihc and ctp_pheno_ihc$hseq_Oligo
## t = 0.63397, df = 47, p-value = 0.5292
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1941425  0.3638563
## sample estimates:
##        cor 
## 0.09208094
cor.test(ctp_pheno_ihc$oligo_ihc, ctp_pheno_ihc$mcc_Oligo)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$oligo_ihc and ctp_pheno_ihc$mcc_Oligo
## t = 0.44019, df = 47, p-value = 0.6618
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2211046  0.3391614
## sample estimates:
##        cor 
## 0.06407582
cor.test(ctp_pheno_ihc$oligo_ihc, ctp_pheno_ihc$h_OLIGO)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$oligo_ihc and ctp_pheno_ihc$h_OLIGO
## t = -0.47013, df = 47, p-value = 0.6404
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3430129  0.2169551
## sample estimates:
##         cor 
## -0.06841526
cor.test(ctp_pheno_ihc$oligo_ihc, ctp_pheno_ihc$hseq_OPC)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$oligo_ihc and ctp_pheno_ihc$hseq_OPC
## t = -0.40909, df = 47, p-value = 0.6843
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3351492  0.2254058
## sample estimates:
##         cor 
## -0.05956635
cor.test(ctp_pheno_ihc$oligo_ihc, ctp_pheno_ihc$mcc_OPC)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$oligo_ihc and ctp_pheno_ihc$mcc_OPC
## t = -0.7638, df = 47, p-value = 0.4488
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3800883  0.1759488
## sample estimates:
##        cor 
## -0.1107261
cor.test(ctp_pheno_ihc$oligo_ihc, ctp_pheno_ihc$h_OPC)
## 
##  Pearson's product-moment correlation
## 
## data:  ctp_pheno_ihc$oligo_ihc and ctp_pheno_ihc$h_OPC
## t = 0.52129, df = 47, p-value = 0.6046
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2098519  0.3495627
## sample estimates:
##        cor 
## 0.07581873
adj <- "noadjoligo"
transf <- "noclr"

ctp_ihc.tmp <- ctp_pheno_ihc

neuron_cor <- cor.test(ctp_ihc.tmp$neuro_ihc, ctp_ihc.tmp$hseq_Neuron)
astro_cor <- cor.test(ctp_ihc.tmp$astro_ihc, ctp_ihc.tmp$hseq_Astro)
endo_cor <- cor.test(ctp_ihc.tmp$endo_ihc, ctp_ihc.tmp$hseq_Endo)
micro_cor <- cor.test(ctp_ihc.tmp$micro_ihc, ctp_ihc.tmp$hseq_Micro)
oligo_cor <- cor.test(ctp_ihc.tmp$oligo_ihc, ctp_ihc.tmp$hseq_Oligo)

neuro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Neuron", "mcc_Neuron", "h_Neuron", "h_NeuN_pos"), variable.name = "method", value.name = "CTP")
astro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Astro", "mcc_Astro", "h_ASTRO", "h_NeuN_neg"), variable.name = "method", value.name = "CTP")
endo_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Endo", "mcc_Endo", "h_ENDO", "h_NeuN_neg"), variable.name = "method", value.name = "CTP")
micro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Micro", "mcc_Micro", "h_MONO", "h_NeuN_neg"), variable.name = "method", value.name = "CTP")
oligo_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Oligo", "mcc_Oligo", "h_OLIGO", "h_NeuN_neg"), variable.name = "method", value.name = "CTP")

ggplot(neuro_ihc.long, aes(x = neuro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ method, scales = "free_y")
## `geom_smooth()` using formula 'y ~ x'

ggplot(astro_ihc.long, aes(x = astro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ method, scales = "free_y")
## `geom_smooth()` using formula 'y ~ x'

ggplot(endo_ihc.long, aes(x = endo_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ method, scales = "free_y")
## `geom_smooth()` using formula 'y ~ x'

ggplot(micro_ihc.long, aes(x = micro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ method, scales = "free_y")
## `geom_smooth()` using formula 'y ~ x'

ggplot(oligo_ihc.long, aes(x = oligo_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + facet_wrap(~ method, scales = "free_y")
## `geom_smooth()` using formula 'y ~ x'

# Put hseq correlations into 1 plot
neuro_ihc.gg <- ggplot(neuro_ihc.long %>% filter(method == "hseq_Neuron"), aes(x = neuro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Neuron") + ggtitle(paste("r = ", signif(neuron_cor$estimate, 2), ", p = ", signif(neuron_cor$p.value, 2), sep = ""))
astro_ihc.gg <- ggplot(astro_ihc.long %>% filter(method == "hseq_Astro"), aes(x = astro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Astro") + ggtitle(paste("r = ", signif(astro_cor$estimate, 2), ", p = ", signif(astro_cor$p.value, 2), sep = ""))
endo_ihc.gg <- ggplot(endo_ihc.long %>% filter(method == "hseq_Endo"), aes(x = endo_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Endo") + ggtitle(paste("r = ", signif(endo_cor$estimate, 2), ", p = ", signif(endo_cor$p.value, 2), sep = ""))
micro_ihc.gg <- ggplot(micro_ihc.long %>% filter(method == "hseq_Micro"), aes(x = micro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Micro") + ggtitle(paste("r = ", signif(micro_cor$estimate, 2), ", p = ", signif(micro_cor$p.value, 2), sep = ""))
oligo_ihc.gg <- ggplot(oligo_ihc.long %>% filter(method == "hseq_Oligo"), aes(x = oligo_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Oligo") + ggtitle(paste("r = ", signif(oligo_cor$estimate, 2), ", p = ", signif(oligo_cor$p.value, 2), sep = ""))

ggarrange(neuro_ihc.gg, astro_ihc.gg, endo_ihc.gg, micro_ihc.gg, oligo_ihc.gg, ncol = 3, nrow = 2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

ggsave(paste(out_dir, "/ctp_assoc_ihc_", adj, "_", transf, ".png", sep = ""), bg = "transparent", height = 7, width = 7)

13.3.2 adjoligo, noclr

adj <- "adjoligo"
transf <- "noclr"

ctp_ihc.tmp <- ctp_pheno_ihc_hseq_rmoligo

neuron_cor <- cor.test(ctp_ihc.tmp$neuro_ihc, ctp_ihc.tmp$hseq_Neuron)
astro_cor <- cor.test(ctp_ihc.tmp$astro_ihc, ctp_ihc.tmp$hseq_Astro)
endo_cor <- cor.test(ctp_ihc.tmp$endo_ihc, ctp_ihc.tmp$hseq_Endo)
micro_cor <- cor.test(ctp_ihc.tmp$micro_ihc, ctp_ihc.tmp$hseq_Micro)

neuro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Neuron"), variable.name = "method", value.name = "CTP")
astro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Astro"), variable.name = "method", value.name = "CTP")
endo_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Endo"), variable.name = "method", value.name = "CTP")
micro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Micro"), variable.name = "method", value.name = "CTP")

# Put hseq correlations into 1 plot
neuro_ihc.gg <- ggplot(neuro_ihc.long %>% filter(method == "hseq_Neuron"), aes(x = neuro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Neuron") + ggtitle(paste("r = ", signif(neuron_cor$estimate, 2), ", p = ", signif(neuron_cor$p.value, 2), sep = ""))
astro_ihc.gg <- ggplot(astro_ihc.long %>% filter(method == "hseq_Astro"), aes(x = astro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Astro") + ggtitle(paste("r = ", signif(astro_cor$estimate, 2), ", p = ", signif(astro_cor$p.value, 2), sep = ""))
endo_ihc.gg <- ggplot(endo_ihc.long %>% filter(method == "hseq_Endo"), aes(x = endo_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Endo") + ggtitle(paste("r = ", signif(endo_cor$estimate, 2), ", p = ", signif(endo_cor$p.value, 2), sep = ""))
micro_ihc.gg <- ggplot(micro_ihc.long %>% filter(method == "hseq_Micro"), aes(x = micro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Micro") + ggtitle(paste("r = ", signif(micro_cor$estimate, 2), ", p = ", signif(micro_cor$p.value, 2), sep = ""))

ggarrange(neuro_ihc.gg, astro_ihc.gg, endo_ihc.gg, micro_ihc.gg)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

ggsave(paste(out_dir, "/ctp_assoc_ihc_", adj, "_", transf, ".png", sep = ""), bg = "transparent", height = 7, width = 7)

13.3.3 noadjoligo, clr

adj <- "noadjoligo"
transf <- "clr"

ctp_ihc.tmp <- ctp_pheno_ihc_hseq.clr.1e3

neuron_cor <- cor.test(ctp_ihc.tmp$neuro_ihc, ctp_ihc.tmp$hseq_Neuron)
astro_cor <- cor.test(ctp_ihc.tmp$astro_ihc, ctp_ihc.tmp$hseq_Astro)
endo_cor <- cor.test(ctp_ihc.tmp$endo_ihc, ctp_ihc.tmp$hseq_Endo)
micro_cor <- cor.test(ctp_ihc.tmp$micro_ihc, ctp_ihc.tmp$hseq_Micro)
oligo_cor <- cor.test(ctp_ihc.tmp$oligo_ihc, ctp_ihc.tmp$hseq_Oligo)

neuro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Neuron"), variable.name = "method", value.name = "CTP")
astro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Astro"), variable.name = "method", value.name = "CTP")
endo_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Endo"), variable.name = "method", value.name = "CTP")
micro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Micro"), variable.name = "method", value.name = "CTP")
oligo_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Oligo"), variable.name = "method", value.name = "CTP")

# Put hseq correlations into 1 plot
neuro_ihc.gg <- ggplot(neuro_ihc.long %>% filter(method == "hseq_Neuron"), aes(x = neuro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Neuron") + ggtitle(paste("r = ", signif(neuron_cor$estimate, 2), ", p = ", signif(neuron_cor$p.value, 2), sep = ""))
astro_ihc.gg <- ggplot(astro_ihc.long %>% filter(method == "hseq_Astro"), aes(x = astro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Astro") + ggtitle(paste("r = ", signif(astro_cor$estimate, 2), ", p = ", signif(astro_cor$p.value, 2), sep = ""))
endo_ihc.gg <- ggplot(endo_ihc.long %>% filter(method == "hseq_Endo"), aes(x = endo_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Endo") + ggtitle(paste("r = ", signif(endo_cor$estimate, 2), ", p = ", signif(endo_cor$p.value, 2), sep = ""))
micro_ihc.gg <- ggplot(micro_ihc.long %>% filter(method == "hseq_Micro"), aes(x = micro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Micro") + ggtitle(paste("r = ", signif(micro_cor$estimate, 2), ", p = ", signif(micro_cor$p.value, 2), sep = ""))
oligo_ihc.gg <- ggplot(oligo_ihc.long %>% filter(method == "hseq_Oligo"), aes(x = oligo_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Oligo") + ggtitle(paste("r = ", signif(oligo_cor$estimate, 2), ", p = ", signif(oligo_cor$p.value, 2), sep = ""))

ggarrange(neuro_ihc.gg, astro_ihc.gg, endo_ihc.gg, micro_ihc.gg, oligo_ihc.gg, ncol = 3, nrow = 2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

ggsave(paste(out_dir, "/ctp_assoc_ihc_", adj, "_", transf, ".png", sep = ""), bg = "transparent", height = 7, width = 7)

13.3.4 adjoligo, clr

adj <- "adjoligo"
transf <- "clr"

ctp_ihc.tmp <- ctp_pheno_ihc_rmoligo_hseq.clr.1e3

neuron_cor <- cor.test(ctp_ihc.tmp$neuro_ihc, ctp_ihc.tmp$hseq_Neuron)
astro_cor <- cor.test(ctp_ihc.tmp$astro_ihc, ctp_ihc.tmp$hseq_Astro)
endo_cor <- cor.test(ctp_ihc.tmp$endo_ihc, ctp_ihc.tmp$hseq_Endo)
micro_cor <- cor.test(ctp_ihc.tmp$micro_ihc, ctp_ihc.tmp$hseq_Micro)

neuro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Neuron"), variable.name = "method", value.name = "CTP")
astro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Astro"), variable.name = "method", value.name = "CTP")
endo_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Endo"), variable.name = "method", value.name = "CTP")
micro_ihc.long <- melt(ctp_ihc.tmp, measure.vars = c("hseq_Micro"), variable.name = "method", value.name = "CTP")

# Put hseq correlations into 1 plot
neuro_ihc.gg <- ggplot(neuro_ihc.long %>% filter(method == "hseq_Neuron"), aes(x = neuro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Neuron") + ggtitle(paste("r = ", signif(neuron_cor$estimate, 2), ", p = ", signif(neuron_cor$p.value, 2), sep = ""))
astro_ihc.gg <- ggplot(astro_ihc.long %>% filter(method == "hseq_Astro"), aes(x = astro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Astro") + ggtitle(paste("r = ", signif(astro_cor$estimate, 2), ", p = ", signif(astro_cor$p.value, 2), sep = ""))
endo_ihc.gg <- ggplot(endo_ihc.long %>% filter(method == "hseq_Endo"), aes(x = endo_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Endo") + ggtitle(paste("r = ", signif(endo_cor$estimate, 2), ", p = ", signif(endo_cor$p.value, 2), sep = ""))
micro_ihc.gg <- ggplot(micro_ihc.long %>% filter(method == "hseq_Micro"), aes(x = micro_ihc, y = CTP)) + geom_point() + geom_smooth(method = "lm") + ylab("Micro") + ggtitle(paste("r = ", signif(micro_cor$estimate, 2), ", p = ", signif(micro_cor$p.value, 2), sep = ""))

ggarrange(neuro_ihc.gg, astro_ihc.gg, endo_ihc.gg, micro_ihc.gg)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

ggsave(paste(out_dir, "/ctp_assoc_ihc_", adj, "_", transf, ".png", sep = ""), bg = "transparent", height = 7, width = 7)

13.4 Adjust for oligodendrocyte fraction

  • remove the oligodendrocyte proportion, then scale the others to 100%

13.4.1 Munge

#ctp_pheno_ihc_mcc_rmoligo <- ctp_pheno_ihc_mcc %>% dplyr::select(-c("NonN_Oligo_MBP"))

# Long format for deconvolved proportions
ctp_pheno_ihc_hseq_rmoligo.long <- melt(ctp_pheno_ihc_hseq_rmoligo, measure.vars = c("hseq_Neuron", "hseq_Exc", "hseq_Inh", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_OPC"), variable.name = "celltype", value.name = "CTP_adjoligo")

# Long format for IHC proportions
ctp_pheno_ihc_hseq_rmoligo.long <- melt(ctp_pheno_ihc_hseq_rmoligo.long, measure.vars = c("neuro_ihc", "astro_ihc", "endo_ihc", "micro_ihc"), variable.name = "celltype_ihc", value.name = "CTP_ihc_adjoligo")

# Only take matching data
ctp_pheno_ihc_hseq_rmoligo.long <- ctp_pheno_ihc_hseq_rmoligo.long %>% filter((celltype == "hseq_Neuron" & celltype_ihc == "neuro_ihc") | (celltype == "hseq_Astro" & celltype_ihc == "astro_ihc") | (celltype == "hseq_Endo" & celltype_ihc == "endo_ihc") | (celltype == "hseq_Micro" & celltype_ihc == "micro_ihc"))

13.4.2 Plots

neuro_ihc_rmoligo <- ctp_pheno_ihc_hseq_rmoligo.long %>% filter(celltype_ihc == "neuro_ihc") %>% dplyr::select(c("CTP_adjoligo", "CTP_ihc_adjoligo"))
cor.test(neuro_ihc_rmoligo$CTP_ihc_adjoligo, neuro_ihc_rmoligo$CTP_adjoligo)

astro_ihc_rmoligo <- ctp_pheno_ihc_hseq_rmoligo.long %>% filter(celltype_ihc == "astro_ihc") %>% dplyr::select(c("CTP_adjoligo", "CTP_ihc_adjoligo"))
cor.test(astro_ihc_rmoligo$CTP_ihc_adjoligo, astro_ihc_rmoligo$CTP_adjoligo)

micro_ihc_rmoligo <- ctp_pheno_ihc_hseq_rmoligo.long %>% filter(celltype_ihc == "micro_ihc") %>% dplyr::select(c("CTP_adjoligo", "CTP_ihc_adjoligo"))
cor.test(micro_ihc_rmoligo$CTP_ihc_adjoligo, micro_ihc_rmoligo$CTP_adjoligo)

endo_ihc_rmoligo <- ctp_pheno_ihc_hseq_rmoligo.long %>% filter(celltype_ihc == "endo_ihc") %>% dplyr::select(c("CTP_adjoligo", "CTP_ihc_adjoligo"))
cor.test(endo_ihc_rmoligo$CTP_ihc_adjoligo, endo_ihc_rmoligo$CTP_adjoligo)

neuro_adjoligo.gg <- ggplot(ctp_pheno_ihc_hseq_rmoligo.long %>% filter(celltype_ihc == "neuro_ihc"), aes(x = CTP_ihc_adjoligo, y = CTP_adjoligo)) + geom_point() + geom_smooth(method = "lm") + ggtitle("neurons, adjusting for oligos")

astro_adjoligo.gg <- ggplot(ctp_pheno_ihc_hseq_rmoligo.long %>% filter(celltype_ihc == "astro_ihc"), aes(x = CTP_ihc_adjoligo, y = CTP_adjoligo)) + geom_point() + geom_smooth(method = "lm") + ggtitle("astrocytes, adjusting for oligos")

micro_adjoligo.gg <- ggplot(ctp_pheno_ihc_hseq_rmoligo.long %>% filter(celltype_ihc == "micro_ihc"), aes(x = CTP_ihc_adjoligo, y = CTP_adjoligo)) + geom_point() + geom_smooth(method = "lm") + ggtitle("microglia, adjusting for oligos")

endo_adjoligo.gg <- ggplot(ctp_pheno_ihc_hseq_rmoligo.long %>% filter(celltype_ihc == "endo_ihc"), aes(x = CTP_ihc_adjoligo, y = CTP_adjoligo)) + geom_point() + geom_smooth(method = "lm") + ggtitle("endothelial, adjusting for oligos")

ggarrange(neuro_adjoligo.gg, astro_adjoligo.gg, micro_adjoligo.gg, endo_adjoligo.gg, nrow = 2, ncol = 2)

13.5 Check differential abundance of IHC cell-types in this subset

13.5.1 Munge

ctp_pheno_ihc_hseq_rmoligo_pheno <- inner_join(ctp_pheno_ihc_hseq_rmoligo, pheno, by = "IID")

table(ctp_pheno_ihc_hseq_rmoligo_pheno$group)
## 
## AZD CTL 
##  18  31
keep_pheno_col <- ctp_pheno_ihc_hseq_rmoligo_pheno %>% dplyr::select(all_of(keep_cols))
ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr <- as.matrix(ctp_pheno_ihc_hseq_rmoligo_pheno %>% dplyr::select(c("neuro_ihc", "astro_ihc", "endo_ihc", "micro_ihc")))

# Make minimum CTP = 1e-3
length(which(ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 0
ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0 <- ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr
ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0[which(ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0 <= 1e-3)] <- 1e-3

# Perform clr-transform
ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0.tr <- clr(ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0)
ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3 <- data.frame(keep_pheno_col, ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0.tr)

13.5.2 Linear models of deconvolved cell-types with clr-transformation

  • leave out sex as a covariate, as all n=49 samples here with IHC were male
summary(lm(neuro_ihc ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = neuro_ihc ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44023 -0.10196  0.03129  0.08372  0.34713 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.07393    0.03902  27.526   <2e-16 ***
## groupCTL     0.05423    0.04905   1.105    0.275    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1655 on 47 degrees of freedom
## Multiple R-squared:  0.02534,    Adjusted R-squared:  0.004606 
## F-statistic: 1.222 on 1 and 47 DF,  p-value: 0.2746
summary(lm(astro_ihc ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = astro_ihc ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32685 -0.14919  0.01582  0.15250  0.32810 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.11217    0.04211  -2.664   0.0106 *
## groupCTL    -0.11212    0.05295  -2.118   0.0395 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1787 on 47 degrees of freedom
## Multiple R-squared:  0.08709,    Adjusted R-squared:  0.06767 
## F-statistic: 4.484 on 1 and 47 DF,  p-value: 0.03953
summary(lm(endo_ihc ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = endo_ihc ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23531 -0.13337 -0.00206  0.09637  0.42833 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.17217    0.03708  -4.643 2.78e-05 ***
## groupCTL     0.05204    0.04662   1.116     0.27    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1573 on 47 degrees of freedom
## Multiple R-squared:  0.02582,    Adjusted R-squared:  0.005095 
## F-statistic: 1.246 on 1 and 47 DF,  p-value: 0.27
summary(lm(micro_ihc ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = micro_ihc ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48884 -0.15464  0.00626  0.13685  0.54099 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.789579   0.052818 -14.949   <2e-16 ***
## groupCTL     0.005854   0.066404   0.088     0.93    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2241 on 47 degrees of freedom
## Multiple R-squared:  0.0001653,  Adjusted R-squared:  -0.02111 
## F-statistic: 0.007771 on 1 and 47 DF,  p-value: 0.9301
summary(lm(neuro_ihc ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = neuro_ihc ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41434 -0.10129  0.03912  0.10099  0.31929 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.620323   0.490546   1.265    0.213
## groupCTL     0.083826   0.053849   1.557    0.127
## age          0.004322   0.005358   0.807    0.424
## batch1      -0.012311   0.050227  -0.245    0.808
## race         0.101193   0.103188   0.981    0.332
## pmi         -0.005057   0.005537  -0.913    0.366
## 
## Residual standard error: 0.1684 on 43 degrees of freedom
## Multiple R-squared:  0.07748,    Adjusted R-squared:  -0.02979 
## F-statistic: 0.7223 on 5 and 43 DF,  p-value: 0.6104
summary(lm(astro_ihc ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = astro_ihc ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30858 -0.14951  0.01124  0.14116  0.34214 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -8.137e-02  5.399e-01  -0.151   0.8809  
## groupCTL    -1.144e-01  5.927e-02  -1.930   0.0603 .
## age          1.784e-05  5.897e-03   0.003   0.9976  
## batch1      -4.490e-02  5.529e-02  -0.812   0.4212  
## race        -5.564e-03  1.136e-01  -0.049   0.9612  
## pmi          2.467e-04  6.095e-03   0.040   0.9679  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1853 on 43 degrees of freedom
## Multiple R-squared:  0.1016, Adjusted R-squared:  -0.002912 
## F-statistic: 0.9721 on 5 and 43 DF,  p-value: 0.4456
summary(lm(endo_ihc ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = endo_ihc ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26012 -0.11277  0.00086  0.08934  0.43386 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.130535   0.473790   0.276    0.784
## groupCTL     0.051419   0.052009   0.989    0.328
## age         -0.003147   0.005175  -0.608    0.546
## batch1      -0.004344   0.048512  -0.090    0.929
## race        -0.004672   0.099663  -0.047    0.963
## pmi         -0.004291   0.005348  -0.802    0.427
## 
## Residual standard error: 0.1626 on 43 degrees of freedom
## Multiple R-squared:  0.04796,    Adjusted R-squared:  -0.06275 
## F-statistic: 0.4332 on 5 and 43 DF,  p-value: 0.8229
summary(lm(micro_ihc ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = micro_ihc ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.42155 -0.18108  0.00904  0.12077  0.49763 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.669486   0.661677  -1.012    0.317
## groupCTL    -0.020876   0.072634  -0.287    0.775
## age         -0.001193   0.007227  -0.165    0.870
## batch1       0.061551   0.067749   0.909    0.369
## race        -0.090957   0.139186  -0.653    0.517
## pmi          0.009101   0.007469   1.219    0.230
## 
## Residual standard error: 0.2271 on 43 degrees of freedom
## Multiple R-squared:  0.0605, Adjusted R-squared:  -0.04875 
## F-statistic: 0.5538 on 5 and 43 DF,  p-value: 0.7346

13.6 Check differential abundance of deconvolved cell-types in this subset

13.6.1 Munge

ctp_pheno_ihc_hseq_rmoligo_pheno <- inner_join(ctp_pheno_ihc_hseq_rmoligo, pheno, by = "IID")

table(ctp_pheno_ihc_hseq_rmoligo_pheno$group)
## 
## AZD CTL 
##  18  31
keep_pheno_col <- ctp_pheno_ihc_hseq_rmoligo_pheno %>% dplyr::select(all_of(keep_cols))
ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr <- as.matrix(ctp_pheno_ihc_hseq_rmoligo_pheno %>% dplyr::select(c("hseq_Exc", "hseq_Inh", "hseq_Astro", "hseq_Endo", "hseq_Micro", "hseq_OPC")))

# Make minimum CTP = 1e-3
length(which(ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 0
ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0 <- ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr
ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0[which(ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0 <= 1e-3)] <- 1e-3

# Perform clr-transform
ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0.tr <- clr(ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0)
ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3 <- data.frame(keep_pheno_col, ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr0.tr)

13.6.2 Linear models

  • leave out sex as a covariate, as all n=49 samples here with IHC were male
summary(lm(hseq_Exc ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = hseq_Exc ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34293 -0.08552  0.01162  0.07871  0.56493 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.30559    0.04196  31.115   <2e-16 ***
## groupCTL     0.03805    0.05275   0.721    0.474    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.178 on 47 degrees of freedom
## Multiple R-squared:  0.01095,    Adjusted R-squared:  -0.0101 
## F-statistic: 0.5202 on 1 and 47 DF,  p-value: 0.4743
summary(lm(hseq_Inh ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = hseq_Inh ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.133640 -0.054484 -0.008477  0.053479  0.290462 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.01618    0.02059  49.344   <2e-16 ***
## groupCTL     0.01731    0.02589   0.668    0.507    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08737 on 47 degrees of freedom
## Multiple R-squared:  0.009416,   Adjusted R-squared:  -0.01166 
## F-statistic: 0.4467 on 1 and 47 DF,  p-value: 0.5072
summary(lm(hseq_Astro ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = hseq_Astro ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25131 -0.07718 -0.03181  0.09109  0.47219 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.07528    0.03386  31.753   <2e-16 ***
## groupCTL    -0.02187    0.04258  -0.514     0.61    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1437 on 47 degrees of freedom
## Multiple R-squared:  0.005581,   Adjusted R-squared:  -0.01558 
## F-statistic: 0.2638 on 1 and 47 DF,  p-value: 0.6099
summary(lm(hseq_Endo ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = hseq_Endo ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.168696 -0.059976 -0.002166  0.053907  0.270680 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.64656    0.01897 -34.082   <2e-16 ***
## groupCTL    -0.02342    0.02385  -0.982    0.331    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08049 on 47 degrees of freedom
## Multiple R-squared:  0.02011,    Adjusted R-squared:  -0.0007413 
## F-statistic: 0.9644 on 1 and 47 DF,  p-value: 0.3311
summary(lm(hseq_Micro ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = hseq_Micro ~ group, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39353 -0.08995  0.00467  0.10263  0.24708 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.873426   0.032502 -26.873   <2e-16 ***
## groupCTL    -0.004447   0.040862  -0.109    0.914    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1379 on 47 degrees of freedom
## Multiple R-squared:  0.000252,   Adjusted R-squared:  -0.02102 
## F-statistic: 0.01185 on 1 and 47 DF,  p-value: 0.9138
summary(lm(hseq_Exc ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = hseq_Exc ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39429 -0.06578  0.01748  0.08145  0.52352 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.997912   0.496730   2.009   0.0508 .
## groupCTL     0.046405   0.054527   0.851   0.3995  
## age          0.003187   0.005425   0.588   0.5599  
## batch1      -0.129686   0.050860  -2.550   0.0144 *
## race         0.091031   0.104489   0.871   0.3885  
## pmi          0.001836   0.005607   0.327   0.7449  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1705 on 43 degrees of freedom
## Multiple R-squared:  0.1701, Adjusted R-squared:  0.07362 
## F-statistic: 1.763 on 5 and 43 DF,  p-value: 0.141
summary(lm(hseq_Inh ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = hseq_Inh ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.141550 -0.069167 -0.000423  0.044592  0.273307 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.1469562  0.2594565   4.421 6.58e-05 ***
## groupCTL     0.0082869  0.0284813   0.291    0.772    
## age         -0.0014492  0.0028337  -0.511    0.612    
## batch1      -0.0270007  0.0265658  -1.016    0.315    
## race         0.0004041  0.0545774   0.007    0.994    
## pmi          0.0021231  0.0029288   0.725    0.472    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08905 on 43 degrees of freedom
## Multiple R-squared:  0.05858,    Adjusted R-squared:  -0.05088 
## F-statistic: 0.5352 on 5 and 43 DF,  p-value: 0.7484
summary(lm(hseq_Astro ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = hseq_Astro ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34161 -0.05518 -0.00325  0.06722  0.38626 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  1.002190   0.382236   2.622  0.01204 * 
## groupCTL    -0.009987   0.041959  -0.238  0.81300   
## age          0.001555   0.004175   0.372  0.71136   
## batch1      -0.135362   0.039137  -3.459  0.00124 **
## race         0.038974   0.080404   0.485  0.63033   
## pmi         -0.004336   0.004315  -1.005  0.32053   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1312 on 43 degrees of freedom
## Multiple R-squared:  0.2415, Adjusted R-squared:  0.1533 
## F-statistic: 2.738 on 5 and 43 DF,  p-value: 0.03111
summary(lm(hseq_Endo ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = hseq_Endo ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.164800 -0.050172  0.008049  0.036296  0.205258 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.5023769  0.2165868  -2.320   0.0252 * 
## groupCTL    -0.0182668  0.0237753  -0.768   0.4465   
## age         -0.0007989  0.0023655  -0.338   0.7372   
## batch1      -0.0639759  0.0221764  -2.885   0.0061 **
## race        -0.0089890  0.0455597  -0.197   0.8445   
## pmi         -0.0050250  0.0024449  -2.055   0.0460 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07434 on 43 degrees of freedom
## Multiple R-squared:  0.2353, Adjusted R-squared:  0.1464 
## F-statistic: 2.646 on 5 and 43 DF,  p-value: 0.03586
summary(lm(hseq_Micro ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3))
## 
## Call:
## lm(formula = hseq_Micro ~ group + age + batch + race + pmi, data = ctp_pheno_ihc_hseq_rmoligo_pheno.tmp.clr.1e3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34702 -0.08913 -0.00494  0.10977  0.25531 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.9628320  0.4085811  -2.357   0.0231 *
## groupCTL    -0.0158817  0.0448511  -0.354   0.7250  
## age          0.0004300  0.0044624   0.096   0.9237  
## batch1       0.0268570  0.0418347   0.642   0.5243  
## race         0.0007982  0.0859462   0.009   0.9926  
## pmi          0.0067727  0.0046121   1.468   0.1493  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1402 on 43 degrees of freedom
## Multiple R-squared:  0.05404,    Adjusted R-squared:  -0.05596 
## F-statistic: 0.4913 on 5 and 43 DF,  p-value: 0.7809